http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
deleted file mode 100644
index 0827c45..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q01.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Filter Query :)
-(: Find all reading for hurricane force wind warning or extreme wind warning. 
:)
-(: The warnings occur when the wind speed (AWND) exceeds 110 mph (49.1744     
:)
-(: meters per second). (Wind value is in tenth of a meter per second)         
:)
-let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($collection)/dataCollection/data
-where $r/dataType eq "AWND" and xs:decimal(fn:data($r/value)) gt 491.744
-return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
deleted file mode 100644
index 0635618..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q02.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-(:
-XQuery Aggregate Query
-----------------------
-Find the annual precipitation (PRCP) for a Syracuse, NY using the airport
-weather station (USW00014771) report for 1999.                                 
    
-:)
-fn:sum(
-    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($collection)/dataCollection/data
-    where $r/station eq "GHCND:USW00014771" 
-        and $r/dataType eq "PRCP" 
-        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 1999
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
deleted file mode 100644
index c58b0a3..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q03.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Aggregate Query :)
-(: Find the highest recorded temperature (TMAX) in Celsius.                   
:)
-fn:max(
-    let $collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($collection)/dataCollection/data
-    where $r/dataType eq "TMAX"
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
deleted file mode 100644
index 5b7246d..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find all the weather readings for Washington state for a specific day    :)
-(: 1976/7/4.                                                                  
:)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-    
-where $s/id eq $r/station 
-    and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and 
fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
-    and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000")
-return $r
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
deleted file mode 100644
index 6c7810a..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings on 1976-07-04.                       
:)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-        
-    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
-    where $date eq xs:date("1976-07-04")
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
deleted file mode 100644
index 18e627a..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations for Washington state.                       
:)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    where (some $x in $s/locationLabels satisfies ($x/type eq "ST" and 
fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON"))
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
deleted file mode 100644
index c95f3f5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Find the lowest recorded temperature (TMIN) in the United States for     :)
-(: 2001.                                                                      
:)
-fn:min(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    where $s/id eq $r/station
-        and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and 
$x/id eq "FIPS:US"))
-        and $r/dataType eq "TMIN" 
-        and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001
-    return $r/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
deleted file mode 100644
index 8548742..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_sensor.xq
+++ /dev/null
@@ -1,28 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all sensor readings for TMIN in 2001.                                
:)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    let $date := xs:date(fn:substring(xs:string(fn:data($r/date)), 0, 11))
-    where $r/dataType eq "TMIN" 
-        and fn:year-from-date($date) eq 2001
-    return $r/value
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
deleted file mode 100644
index 6f3a6b8..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_station.xq
+++ /dev/null
@@ -1,25 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Aggregate Query :)
-(: Count all stations in the United States.                                 :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    where (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and 
$x/id eq "FIPS:US"))
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
deleted file mode 100644
index 5c8ed54..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06.xq
+++ /dev/null
@@ -1,30 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the highest recorded temperature (TMAX) for each station for each     
:)
-(: day over the year 2000.                                                    
:)
-let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-for $s in collection($station_collection)/stationCollection/station
-
-let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-for $r in collection($sensor_collection)/dataCollection/data
-
-where $s/id eq $r/station
-    and $r/dataType eq "TMAX" 
-    and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-return ($s/displayName, $r/date, $r/value)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
deleted file mode 100644
index 1938151..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_sensor.xq
+++ /dev/null
@@ -1,27 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count max temperature (TMAX) readings for 2000-01-01.                       
   :)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    
-    where $r/dataType eq "TMAX" 
-       and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
deleted file mode 100644
index 3c1dc98..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_station.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the stations.                                         :)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
deleted file mode 100644
index 5b1f2ac..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07.xq
+++ /dev/null
@@ -1,33 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Self Join Query :)
-(: Self join with all stations finding the difference in min and max       :)
-(: temperature and get the average.                                        :)
-fn:avg(
-    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_min in collection($sensor_collection_min)/dataCollection/data
-    
-    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_max in collection($sensor_collection_max)/dataCollection/data
-    
-    where $r_min/station eq $r_max/station
-        and $r_min/date eq $r_max/date
-        and $r_min/dataType eq "TMIN"
-        and $r_max/dataType eq "TMAX"
-    return $r_max/value - $r_min/value
-) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
deleted file mode 100644
index a48cad5..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmax.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMAX.                                         
:)
-count(
-    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_max in collection($sensor_collection_max)/dataCollection/data
-    
-    where $r_max/dataType eq "TMAX"
-    return $r_max
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
deleted file mode 100644
index 4a72d0f..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_tmin.xq
+++ /dev/null
@@ -1,26 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Find the all the records for TMIN.                                         
:)
-count(
-    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r_min in collection($sensor_collection_min)/dataCollection/data
-    
-    where $r_min/dataType eq "TMIN"
-    return $r_min
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
deleted file mode 100644
index 6fa981b..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/sensor_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather sensor readings available.                           
:)
-count(
-    let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
-    for $r in collection($sensor_collection)/dataCollection/data
-    return $r
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
deleted file mode 100644
index 1958ec6..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/station_count.xq
+++ /dev/null
@@ -1,24 +0,0 @@
-(: Licensed to the Apache Software Foundation (ASF) under one
-   or more contributor license agreements.  See the NOTICE file
-   distributed with this work for additional information
-   regarding copyright ownership.  The ASF licenses this file
-   to you under the Apache License, Version 2.0 (the
-   "License"); you may not use this file except in compliance
-   with the License.  You may obtain a copy of the License at
-   
-     http://www.apache.org/licenses/LICENSE-2.0
-   
-   Unless required by applicable law or agreed to in writing,
-   software distributed under the License is distributed on an
-   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-   KIND, either express or implied.  See the License for the
-   specific language governing permissions and limitations
-   under the License. :)
-
-(: XQuery Join Query :)
-(: Count all the weather stations available.                                  
:)
-count(
-    let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations"
-    for $s in collection($station_collection)/stationCollection/station
-    return $s
-)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
deleted file mode 100644
index 58bea51..0000000
--- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/README.md
+++ /dev/null
@@ -1,51 +0,0 @@
-<!--
-  Licensed to the Apache Software Foundation (ASF) under one or more
-  contributor license agreements.  See the NOTICE file distributed with
-  this work for additional information regarding copyright ownership.
-  The ASF licenses this file to You under the Apache License, Version 2.0
-  (the "License"); you may not use this file except in compliance with
-  the License.  You may obtain a copy of the License at
-
-      http://www.apache.org/licenses/LICENSE-2.0
-
-  Unless required by applicable law or agreed to in writing, software
-  distributed under the License is distributed on an "AS IS" BASIS,
-  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-  See the License for the specific language governing permissions and
-  limitations under the License.
--->
-
-Weather Data Conversion To XML
-=====================
-
-# Introduction
-
-The NOAA has hosted DAILY GLOBAL HISTORICAL CLIMATOLOGY NETWORK (GHCN-DAILY) 
-.dat files. Weather.gov has an RSS/XML feed that gives current weather sensor 
-readings. Using the RSS feed as a template, the GHCN-DAILY historical 
-information is used to generate past RSS feed XML documents. The process 
allows 
-testing on a large set of information with out having to continually monitor 
-the weather.gov site for all the weather details for years.
-
-# Detailed Description
-
-Detailed GHDN-DAILY information: 
-<http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt>
-
-The process takes a save folder for the data. The folder contains a several 
-folders:
-
- - all_xml_files (The generated xml files for a given package)
- - downloads (All files taken from the NOAA HTTP site)
- - dataset-[name] (all files related to a single dataset)
-     
-     
-# Examples commands
-
-Building
-
-
-Partitioning
-python weather_cli.py -x weather_example.xml
-
-Linking
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
deleted file mode 100755
index 632dbcb..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ /dev/null
@@ -1,67 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log. 
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ 
"-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
-    echo "Please supply a directory for query files to be found."
-    exit
-fi
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
-    if [ -z "${3}" ] || [[ "${j}" =~ "${3}" ]] 
-    then
-        date
-        echo "Running query: ${j}"
-        log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
-        log_base_path=$(dirname ${j/queries/query_logs})
-        mkdir -p ${log_base_path}
-        time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${2} -timing 
-showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size 
${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > 
${log_base_path}/${log_file} 2>&1
-        echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file}
-        echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
-        echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> 
${log_base_path}/${log_file}
-        fi;
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Tests Finished"
-    EMAIL="[email protected]"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in folder ${1}.
-    EOM
-else
-    echo "No mail command to use."
-fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
deleted file mode 100755
index 98ab04b..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ /dev/null
@@ -1,90 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-# Run all the queries and save a log. 
-# First argument: Supply the folder which houses all the queries (recursive).
-# Second argument: adds options to the VXQuery CLI.
-#
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ 
"-client-net-ip-address 169.235.27.138"
-# run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
-#
-CLUSTER="uci"
-REPEAT=5
-FRAME_SIZE=$((8*1024))
-BUFFER_SIZE=$((32*1024*1024))
-#JOIN_HASH_SIZE=$((256*1024*1024))
-JOIN_HASH_SIZE=-1
-
-if [ -z "${1}" ]
-then
-    echo "Please supply a directory for query files to be found."
-    exit
-fi
-
-if [ -z "${2}" ]
-then
-    echo "Please the number of nodes (start at 0)."
-    exit
-fi
-
-# Run queries for the specified number of nodes.
-echo "Starting ${2} cluster nodes"
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c 
vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a start
-
-# wait for cluster to finish setting up  
-sleep 5
-
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
-
-for j in $(find ${1} -name '*q??.xq')
-do
-    # Only work with i nodes.
-    if [[ "${j}" =~ "${2}nodes" ]]
-    then
-        # Only run for specified queries.
-        if [ -z "${4}" ] || [[ "${j}" =~ "${4}" ]]
-        then
-            date
-            echo "Running query: ${j}"
-            log_file="$(basename ${j}).$(date +%Y%m%d%H%M).log"
-            log_base_path=$(dirname ${j/queries/query_logs})
-            mkdir -p ${log_base_path}
-            time sh ./vxquery-cli/target/appassembler/bin/vxq ${j} ${3} 
-timing -showquery -showoet -showrp -frame-size ${FRAME_SIZE} -buffer-size 
${BUFFER_SIZE} -join-hash-size ${JOIN_HASH_SIZE} -repeatexec ${REPEAT} > 
${log_base_path}/${log_file} 2>&1
-            echo "\nBuffer Size: ${BUFFER_SIZE}" >> 
${log_base_path}/${log_file}
-            echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file}
-            echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> 
${log_base_path}/${log_file}
-        fi;
-    fi;
-done
-    
-# Stop cluster.
-python vxquery-server/src/main/resources/scripts/cluster_cli.py -c 
vxquery-server/src/main/resources/conf/${CLUSTER}/${2}nodes.xml -a stop
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Cluster Tests Finished"
-    EMAIL="[email protected]"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in folder ${1} for a ${2} node cluster using 
${HOSTNAME}.
-    EOM
-else
-    echo "No mail command to use."
-fi;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
deleted file mode 100755
index 58976b7..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_group_test.sh
+++ /dev/null
@@ -1,51 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-DATASET="dataset-hcn-d2"
-cluster_ip=${1}
-base_weather_folder=${2}
-
-for n in 7 6 5 3 4 2 1 0
-do
-    #for t in "batch_scale_out" "speed_up"
-    for t in "batch_scale_out"
-    #for t in "speed_up"
-    do 
-        for p in 2 
-        do 
-            for c in 4
-            do 
-                echo " ==== node ${n} test ${t} partition ${p} cores ${c} ===="
-                sh 
vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
 ${base_weather_folder}/${DATASET}/queries/${t}/${n}nodes/d2_p${p}/ ${n} 
"-client-net-ip-address ${cluster_ip} -available-processors ${c}"
-            done
-        done
-    done
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="Benchmark Group Tests Finished"
-    EMAIL="[email protected]"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all tests in the predefined group for ${DATASET}.
-    EOM
-else
-    echo "No mail command to use."
-fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
deleted file mode 100755
index a6788be..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_mrql_tests.sh
+++ /dev/null
@@ -1,42 +0,0 @@
-#!/bin/bash
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#      http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-export JAVA_HOME=/home/ecarm002/java/jdk1.6.0_45
-REPEAT=${1}
-DATASET="hcn"
-
-for n in `seq 0 7`
-#for n in 0
-do
-    date
-    echo "Running q0${n} on ${DATASET} for MRQL."
-    time for i in {1..${REPEAT}}; do ~/mrql/incubator-mrql/bin/mrql -dist 
-nodes 5 
~/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_${DATASET}/q0${n}.mrql
 >> weather_data/mrql/query_logs/${DATASET}/q0${n}.mrql.log 2>&1; done; 
-done
-
-if which programname >/dev/null;
-then
-    echo "Sending out e-mail notification."
-    SUBJECT="MRQL Tests Finished (${DATASET})"
-    EMAIL="[email protected]"
-    /bin/mail -s "${SUBJECT}" "${EMAIL}" <<EOM
-    Completed all MRQL tests on ${DATASET}.
-    EOM
-else
-    echo "No mail command to use."
-fi;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
deleted file mode 100644
index 8021b2c..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ /dev/null
@@ -1,377 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import os.path
-import linecache
-import distutils.core
-import fileinput
-import socket
-
-from weather_config import *
-from weather_data_files import *
-
-# Weather data files created to manage the conversion process.
-# Allows partition and picking up where you left off.
-#
-# benchmark_name/
-#   data/
-#   queries/
-#   logs/
-class WeatherBenchmark:
-
-    DATA_LINKS_FOLDER = "data_links/"
-    LARGE_FILE_ROOT_TAG = WeatherDataFiles.LARGE_FILE_ROOT_TAG
-    QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/"
-    QUERY_MASTER_FOLDER = "../queries/"
-    QUERY_FILE_LIST = [
-                       "q00.xq",
-                       "q01.xq",
-                       "q02.xq",
-                       "q03.xq",
-                       "q04.xq",
-                       "q05.xq",
-                       "q06.xq",
-                       "q07.xq"
-                       ] 
-    QUERY_UTILITY_LIST = [
-                          "no_result.xq",
-                          "sensor_count.xq",
-                          "station_count.xq",
-                          "q04_sensor.xq",
-                          "q04_station.xq",
-                          "q05_sensor.xq",
-                          "q05_station.xq",
-                          "q06_sensor.xq",
-                          "q06_station.xq",
-                          "q07_tmin.xq",
-                          "q07_tmax.xq",
-                          ] 
-    BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] 
-    BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] 
-    QUERY_COLLECTIONS = ["sensors", "stations"]
-
-    SEPERATOR = "|"
-    
-    def __init__(self, base_paths, partitions, dataset, nodes):
-        self.base_paths = base_paths
-        self.partitions = partitions
-        self.dataset = dataset
-        self.nodes = nodes
-        
-    def print_partition_scheme(self):
-        if (len(self.base_paths) == 0):
-            return
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                self.print_local_partition_schemes(test)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                self.print_cluster_partition_schemes(test)
-            else:
-                print "Unknown test."
-                exit()
-            
-    def print_local_partition_schemes(self, test):
-        node_index = 0
-        virtual_disk_partitions = 
get_local_virtual_disk_partitions(self.partitions)
-        for p in self.partitions:
-            scheme = self.get_local_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_disk_partitions, scheme, 
test, p, node_index)
-        
-    def print_cluster_partition_schemes(self, test):
-        node_index = self.get_current_node_index()
-        virtual_disk_partitions = 
get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
-        for p in self.partitions:
-            scheme = self.get_cluster_partition_scheme(test, p)
-            self.print_partition_schemes(virtual_disk_partitions, scheme, 
test, p, node_index)
-        
-    def print_partition_schemes(self, virtual_partitions, scheme, test, 
partitions, node_id):
-        print
-        print "---------------- Partition Scheme --------------------"
-        print "    Test: " + test
-        print "    Virtual Partitions: " + str(virtual_partitions)
-        print "    Disks: " + str(len(self.base_paths))
-        print "    Partitions: " + str(partitions)
-        print "    Node Id: " + str(node_id)
-        
-        if isinstance(scheme, (tuple, list, dict, set)) and len(scheme) > 0:
-            folder_length = len(scheme[0][3]) + 5
-            row_format = "{:>5} {:>5} {:>5} {:<" + str(folder_length) + "} 
{:<" + str(folder_length) + "}"
-            HEADER = ("Disk", "Index", "Link", "Data Path", "Link Path")
-            print row_format.format(*HEADER)
-            for row in scheme:
-                print row_format.format(*row)
-            print
-        else:
-            print "    Scheme is EMPTY."
-
-    def get_local_partition_scheme(self, test, partition):
-        scheme = []
-        virtual_partitions = get_local_virtual_disk_partitions(self.partitions)
-        data_schemes = get_partition_scheme(0, virtual_partitions, 
self.base_paths)
-        link_base_schemes = get_partition_scheme(0, virtual_partitions, 
self.base_paths, self.DATA_LINKS_FOLDER + test)
-
-        # Match link paths to real data paths.
-        group_size = len(data_schemes) / len(link_base_schemes)
-        for d in range(len(self.base_paths)):
-            offset = 0
-            for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
-                if d == link_disk:
-                    # Only consider a single disk at a time.
-                    for data_node, data_disk, data_virtual, data_index, 
data_path in data_schemes:
-                        if test == "local_speed_up" and data_disk == link_disk 
\
-                                and offset <= data_index and data_index < 
offset + group_size:
-                            scheme.append([data_disk, data_index, link_index, 
data_path, link_path])
-                        elif test == "local_batch_scale_out" and data_disk == 
link_disk \
-                                and data_index == link_index:
-                            scheme.append([data_disk, data_index, link_index, 
data_path, link_path])
-                    offset += group_size
-        return scheme
-    
-    def get_cluster_partition_scheme(self, test, partition):
-        node_index = self.get_current_node_index()
-        if node_index == -1:
-            print "Unknown host."
-            return 
-        
-        scheme = []
-        virtual_disk_partitions = 
get_cluster_virtual_disk_partitions(self.nodes, self.partitions)
-        data_schemes = get_disk_partition_scheme(node_index, 
virtual_disk_partitions, self.base_paths)
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 
partition, self.base_paths, self.DATA_LINKS_FOLDER + test)
-
-        # Match link paths to real data paths.
-        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
-            # Prep
-            if test == "speed_up":
-                group_size = virtual_disk_partitions / (link_node + 1) / 
partition
-            elif test == "batch_scale_out":
-                group_size = virtual_disk_partitions / len(self.nodes) / 
partition
-            else:
-                print "Unknown test."
-                return
-            
-            node_offset = group_size * node_index * partition
-            node_offset += group_size * link_index
-            has_data = True
-            if link_node < node_index:
-                has_data = False
-    
-            # Make links
-            for date_node, data_disk, data_virtual, data_index, data_path in 
data_schemes:
-                if has_data and data_disk == link_disk \
-                        and node_offset <= data_index and data_index < 
node_offset + group_size:
-                    scheme.append([link_disk, data_index, link_index, 
data_path, link_path])
-            scheme.append([link_disk, -1, link_index, "", link_path])
-        return scheme
-    
-    def build_data_links(self, reset):
-        if (len(self.base_paths) == 0):
-            return
-        if reset:
-            shutil.rmtree(self.base_paths[0] + self.DATA_LINKS_FOLDER)
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                for i in self.partitions:
-                    scheme = self.get_local_partition_scheme(test, i)
-                    self.build_data_links_scheme(scheme)
-                if 1 in self.partitions and len(self.base_paths) > 1:
-                    scheme = self.build_data_links_local_zero_partition(test)
-                    self.build_data_links_scheme(scheme)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                for i in self.partitions:
-                    scheme = self.get_cluster_partition_scheme(test, i)
-                    self.build_data_links_scheme(scheme)
-                if 1 in self.partitions and len(self.base_paths) > 1:
-                    scheme = self.build_data_links_cluster_zero_partition(test)
-                    self.build_data_links_scheme(scheme)
-            else:
-                print "Unknown test."
-                exit()
-    
-    def build_data_links_scheme(self, scheme):
-        '''Build all the data links based on the scheme information.'''
-        for (data_disk, data_index, partition, data_path, link_path) in scheme:
-            self.add_collection_links_for(data_path, link_path, data_index)
-    
-    def build_data_links_cluster_zero_partition(self, test):
-        '''Build a scheme for all data in one symbolically linked folder. (0 
partition)'''
-        scheme = []
-        link_base_schemes = get_cluster_link_scheme(len(self.nodes), 1, 
self.base_paths, self.DATA_LINKS_FOLDER + test)
-        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
-            new_link_path = self.get_zero_partition_path(link_node, 
self.DATA_LINKS_FOLDER + test + "/" + str(link_node) + "nodes")
-            scheme.append([0, link_disk, 0, link_path, new_link_path])
-        return scheme
-
-    def build_data_links_local_zero_partition(self, test):
-        '''Build a scheme for all data in one symbolically linked folder. (0 
partition)'''
-        scheme = []
-        index = 0
-        link_base_schemes = get_partition_scheme(0, 1, self.base_paths, 
self.DATA_LINKS_FOLDER + test)
-        for link_node, link_disk, link_virtual, link_index, link_path in 
link_base_schemes:
-            if test == "local_batch_scale_out" and index > 0:
-                continue
-            new_link_path = self.get_zero_partition_path(link_node, 
self.DATA_LINKS_FOLDER + test)
-            scheme.append([0, index, 0, link_path, new_link_path])
-            index += 1
-        return scheme
-
-    def get_zero_partition_path(self, node, key):
-        '''Return a partition path for the zero partition.'''
-        base_path = self.base_paths[0]
-        new_link_path = get_partition_scheme(node, 1, [base_path], 
key)[0][PARTITION_INDEX_PATH]
-        return new_link_path.replace("p1", "p0")
-        
-    def get_current_node_index(self):
-        found = False
-        node_index = 0
-        for machine in self.nodes:
-            if socket.gethostname().startswith(machine.get_node_name()):
-                found = True
-                break
-            node_index += 1
-    
-        if found:
-            return node_index
-        else:
-            return -1
-    
-    def add_collection_links_for(self, real_path, link_path, index):
-        for collection in self.QUERY_COLLECTIONS:
-            collection_path = link_path + collection + "/"
-            collection_index = collection_path + "index" + str(index)
-            if not os.path.isdir(collection_path):
-                os.makedirs(collection_path)
-            if index >= 0:
-                if os.path.islink(collection_index):
-                    os.unlink(collection_index)
-                os.symlink(real_path + collection + "/", collection_index)
-            
-    def copy_query_files(self, reset):
-        for test in self.dataset.get_tests():
-            if test in self.BENCHMARK_LOCAL_TESTS:
-                self.copy_local_query_files(test, reset)
-            elif test in self.BENCHMARK_CLUSTER_TESTS:
-                self.copy_cluster_query_files(test, reset)
-            else:
-                print "Unknown test."
-                exit()
-            
-    def copy_cluster_query_files(self, test, reset):
-        '''Determine the data_link path for cluster query files and copy with
-        new location for collection.'''
-        if 1 in self.partitions and len(self.base_paths) > 1:
-            for n in range(len(self.nodes)):
-                query_path = get_cluster_query_path(self.base_paths, test, 0, 
n)
-                prepare_path(query_path, reset)
-            
-                # Copy query files.
-                new_link_path = self.get_zero_partition_path(n, 
self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
-                self.copy_and_replace_query(query_path, [new_link_path])
-        for n in range(len(self.nodes)):
-            for p in self.partitions:
-                query_path = get_cluster_query_path(self.base_paths, test, p, 
n)
-                prepare_path(query_path, reset)
-            
-                # Copy query files.
-                partition_paths = get_disk_partition_paths(n, p, 
self.base_paths, self.DATA_LINKS_FOLDER + test + "/" + str(n) + "nodes")
-                self.copy_and_replace_query(query_path, partition_paths)
-
-    def copy_local_query_files(self, test, reset):
-        '''Determine the data_link path for local query files and copy with
-        new location for collection.'''
-        if 1 in self.partitions and len(self.base_paths) > 1:
-            query_path = get_local_query_path(self.base_paths, test, 0)
-            prepare_path(query_path, reset)
-    
-            # Copy query files.
-            new_link_path = self.get_zero_partition_path(0, 
self.DATA_LINKS_FOLDER + test)
-            self.copy_and_replace_query(query_path, [new_link_path])
-        for p in self.partitions:
-            query_path = get_local_query_path(self.base_paths, test, p)
-            prepare_path(query_path, reset)
-    
-            # Copy query files.
-            partition_paths = get_disk_partition_paths(0, p, self.base_paths, 
self.DATA_LINKS_FOLDER + test)
-            self.copy_and_replace_query(query_path, partition_paths)
-
-    def copy_and_replace_query(self, query_path, replacement_list):
-        '''Copy the query files over to the query_path and replace the path
-        for the where the collection data is located.'''
-        for query_file in self.QUERY_FILE_LIST + self.QUERY_UTILITY_LIST:
-            shutil.copyfile(self.QUERY_MASTER_FOLDER + query_file, query_path 
+ query_file)
-        
-            # Make a search replace for each collection.
-            for collection in self.QUERY_COLLECTIONS:
-                replacement_list_with_type = []
-                for replace in replacement_list:
-                    replacement_list_with_type.append(replace + collection)
-
-                replace_string = 
self.SEPERATOR.join(replacement_list_with_type)
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace(self.QUERY_REPLACEMENT_KEY + 
collection, replace_string))
-                    
-            # Make a search replace for partition type.
-            if self.dataset.get_partition_type() == "large_files":
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace("/stationCollection", "/" + 
self.LARGE_FILE_ROOT_TAG + "/stationCollection"))
-                for line in fileinput.input(query_path + query_file, True):
-                    sys.stdout.write(line.replace("/dataCollection", "/" + 
self.LARGE_FILE_ROOT_TAG + "/dataCollection"))
-                    
-    def get_number_of_slices_per_disk(self):
-        if len(self.dataset.get_tests()) == 0:
-            print "No test has been defined in config file."
-        else:
-            for test in self.dataset.get_tests():
-                if test in self.BENCHMARK_LOCAL_TESTS:
-                    return get_local_virtual_disk_partitions(self.partitions)
-                elif test in self.BENCHMARK_CLUSTER_TESTS:
-                    return get_cluster_virtual_disk_partitions(self.nodes, 
self.partitions)
-                else:
-                    print "Unknown test."
-                    exit()
-
-def get_cluster_link_scheme(nodes, partition, base_paths, key="partitions"):   
     
-    link_paths = []
-    for n in range(0, nodes):
-        new_link_path = get_disk_partition_scheme(n, partition, base_paths, 
key + "/" + str(n) + "nodes")
-        link_paths.extend(new_link_path)
-    return link_paths
-
-def get_local_query_path(base_paths, test, partition):        
-    return base_paths[0] + "queries/" + test + "/" + 
get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_local_query_folder(disks, partitions):        
-    return "d" + str(disks) + "_p" + str(partitions)
-
-def get_cluster_query_path(base_paths, test, partition, nodes):        
-    return base_paths[0] + "queries/" + test + "/" + str(nodes) + "nodes/" + 
get_local_query_folder(len(base_paths), partition) + "/"
-
-def get_cluster_virtual_disk_partitions(nodes, partitions):
-    vp = get_local_virtual_disk_partitions(partitions)
-    vn = calculate_partitions(range(1, len(nodes)+1, 1))
-    return vp * vn
-
-def get_local_virtual_disk_partitions(partitions):
-    return calculate_partitions(partitions)
-
-def calculate_partitions(list):
-    x = 1
-    for i in list:
-        if x % i != 0:
-            if i % x == 0:
-                x = i
-            else:
-                x *= i
-    return x

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
deleted file mode 100644
index eeae25c..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_cli.py
+++ /dev/null
@@ -1,236 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-import sys, getopt
-
-# Custom modules.
-from weather_data_files import *
-from weather_download_files import *
-from weather_convert_to_xml import *
-from weather_config import *
-from weather_benchmark import *
-
-DEBUG_OUTPUT = False
-
-#
-# Weather conversion for GHCN-DAILY files to xml.
-#
-# http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/readme.txt
-#
-def main(argv):
-    append = False
-    max_records = 0
-    process_file_name = ""
-    reset = False
-    section = "all"
-    token = ""
-    update = False
-    xml_config_path = ""
-    
-    try:
-        opts, args = getopt.getopt(argv, "af:hl:m:ruvw:x:", ["file=", 
"locality=", "max_station_files=", "web_service=", "xml_config="])
-    except getopt.GetoptError:
-        print 'The file options for weather_cli.py were not correctly 
specified.'
-        print 'To see a full list of options try:'
-        print '  $ python weather_cli.py -h'
-        sys.exit(2)
-    for opt, arg in opts:
-        if opt == '-h':
-            print 'Converting weather daily files to xml options:'
-            print '    -a        Append the results to the progress file.'
-            print '    -f (str)  The file name of a specific station to 
process.'
-            print '              * Helpful when testing a single stations XML 
file output.'
-            print '    -l (str)  Select the locality of the scripts execution 
(download, progress_file, sensor_build, station_build, partition, 
partition_scheme, test_links, queries, inventory, statistics).'
-            print '    -m (int)  Limits the number of files created for each 
station.'
-            print '              * Helpful when testing to make sure all 
elements are supported for each station.'
-            print '              Alternate form: --max_station_files=(int)'
-            print '    -r        Reset the build process. (For one section or 
all sections depending on other parameters.)'
-            print '    -u        Recalculate the file count and data size for 
each data source file.'
-            print '    -v        Extra debug information.'
-            print '    -w (str)  Downloads the station XML file form the web 
service.'
-            print '    -x (str)  XML config file for weather data.'
-            sys.exit()
-        elif opt in ('-a', "--append"):
-            append = True
-        elif opt in ('-f', "--file"):
-            # check if file exists.
-            if os.path.exists(arg):
-                process_file_name = arg
-            else:
-                print 'Error: Argument must be a file name for --file (-f).'
-                sys.exit()
-        elif opt in ('-l', "--locality"):
-            if arg in ("download", "progress_file", "sensor_build", 
"station_build", "partition", "partition_scheme", "test_links", "queries", 
"inventory", "statistics"):
-                section = arg
-            else:
-                print 'Error: Argument must be a string for --locality (-l) 
and a valid locality.'
-                sys.exit()
-        elif opt in ('-m', "--max_station_files"):
-            if arg.isdigit():
-                max_records = int(arg)
-            else:
-                print 'Error: Argument must be an integer for 
--max_station_files (-m).'
-                sys.exit()
-        elif opt == '-r':
-            reset = True
-        elif opt == '-u':
-            update = True
-        elif opt == '-v':
-            global DEBUG_OUTPUT
-            DEBUG_OUTPUT = True
-        elif opt == '-w':
-            # check if file exists.
-            if arg is not "":
-                token = arg
-            else:
-                print 'Error: Argument must be a string --web_service (-w).'
-                sys.exit()
-        elif opt in ('-x', "--xml_config"):
-            # check if file exists.
-            if os.path.exists(arg):
-                xml_config_path = arg
-            else:
-                print 'Error: Argument must be a xml file for --xml_config 
(-x).'
-                sys.exit()
-
-    # Required fields to run the script.
-    if xml_config_path == "" or not os.path.exists(xml_config_path):
-        print 'Error: The xml config option must be supplied: --xml_config 
(-x).'
-        sys.exit()
-    config = WeatherConfig(xml_config_path)
-    
-    # Required fields to run the script.
-    if config.get_save_path() == "" or not 
os.path.exists(config.get_save_path()):
-        print 'Error: The save directory option must be supplied in the config 
file.'
-        sys.exit()
-
-    # Set up downloads folder.
-    download_path = config.get_save_path() + "/downloads"
-    if section in ("all", "download"):
-        print 'Processing the download section.'
-        download = WeatherDownloadFiles(download_path)
-        download.download_ghcnd_files(reset)
-        download.download_mshr_files(reset)
-
-        # Unzip the required file.
-        download.unzip_ghcnd_package(config.get_package(), reset)
-        download.unzip_mshr_files(reset)
-
-
-    # Create some basic paths for save files and references.
-    ghcnd_data_dly_path = download_path + '/' + config.get_package() + '/' + 
config.get_package()
-    xml_data_save_path = config.get_save_path() + '/all_xml_files/'
-
-    # Make sure the xml folder is available.
-    if not os.path.isdir(xml_data_save_path):
-        os.makedirs(xml_data_save_path)
-
-    # Set up the XML build objects.
-    convert = WeatherWebServiceMonthlyXMLFile(download_path, 
xml_data_save_path, DEBUG_OUTPUT)
-    progress_file = xml_data_save_path + "_data_progress.csv"
-    data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
-    if section in ("all", "progress_file"):
-        print 'Processing the progress_file section.'
-        options = list()
-        if append:
-            options.append('append')
-        if update:
-            options.append('recalculate')
-        if reset:
-            options.append('reset')
-        data.build_progress_file(options, convert)
-    
-    if section in ("all", "sensor_build"):
-        print 'Processing the sensor_build section.'
-        if process_file_name is not "":
-            # process a single file
-            if os.path.exists(process_file_name):
-                (file_count, data_size) = 
convert.process_sensor_file(process_file_name, max_records, 4)
-                data.update_file_sensor_status(process_file_name, 
WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
-            else:
-                data.update_file_sensor_status(process_file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
-        else:
-            # process directory
-            data.reset()
-            data.set_type("sensor")
-            data.set_data_reset(reset)
-            for file_name in data:
-                file_path = ghcnd_data_dly_path + '/' + file_name
-                if os.path.exists(file_path):
-                    (file_count, data_size) = 
convert.process_sensor_file(file_path, max_records, 4)
-                    data.update_file_sensor_status(file_name, 
WeatherDataFiles.DATA_FILE_GENERATED, file_count, data_size)
-                else:
-                    data.update_file_sensor_status(file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
-                
-    if section in ("all", "station_build"):
-        print 'Processing the station_build section.'
-        data.reset()
-        data.set_type("station")
-        data.set_data_reset(reset)
-        if token is not "":
-            convert.set_token(token)
-        for file_name in data: 
-            file_path = ghcnd_data_dly_path + '/' + file_name
-            if os.path.exists(file_path):
-                return_status = convert.process_station_file(file_path)
-                status = data.get_station_status(return_status)
-                data.update_file_station_status(file_name, status)
-            else:
-                data.update_file_station_status(file_name, 
WeatherDataFiles.DATA_FILE_MISSING)
-                    
-    for dataset in config.get_dataset_list():
-        # Set up the setting for each dataset.
-        dataset_folder = "/dataset-" + dataset.get_name()
-        progress_file = config.get_save_path() + dataset_folder + 
"/_data_progress.csv"
-        data = WeatherDataFiles(ghcnd_data_dly_path, progress_file)
-
-        base_paths = []
-        for paths in dataset.get_save_paths():
-            base_paths.append(paths + dataset_folder + "/")
-        benchmark = WeatherBenchmark(base_paths, dataset.get_partitions(), 
dataset, config.get_node_machine_list())
-        
-        if section in ("all", "partition", "partition_scheme"):
-            slices = benchmark.get_number_of_slices_per_disk()
-            print 'Processing the partition section (' + dataset.get_name() + 
':d' + str(len(base_paths)) + ':s' + str(slices) + ').'
-            data.reset()
-            if section == "partition_scheme":
-                benchmark.print_partition_scheme()
-            else:
-                if dataset.get_partition_type() == "large_files":
-                    data.build_to_n_partition_files(xml_data_save_path, 
slices, base_paths, reset)
-                else:
-                    data.copy_to_n_partitions(xml_data_save_path, slices, 
base_paths, reset)
-    
-        if section in ("all", "test_links"):
-            # TODO determine current node 
-            print 'Processing the test links section (' + dataset.get_name() + 
').'
-            benchmark.print_partition_scheme()
-            benchmark.build_data_links(reset)
-
-        if section in ("all", "queries"):
-            print 'Processing the queries section (' + dataset.get_name() + 
').'
-            benchmark.copy_query_files(reset)
-    
-    if section in ("inventory"):
-        print 'Processing the inventory section.'
-        convert.process_inventory_file()
-                  
-#     if section in ("statistics"):
-#         print 'Processing the statistics section.'
-#         data.print_progress_file_stats(convert)
-                  
-if __name__ == "__main__":
-    main(sys.argv[1:])

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
deleted file mode 100644
index 80607b8..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config.py
+++ /dev/null
@@ -1,134 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-from xml.dom.minidom import parse
-
-class WeatherConfig:
-    def __init__(self, config_xml_file):
-        self.config_xml_file = config_xml_file
-        
-        self.config = parse(self.config_xml_file)
-
-    def get_save_path(self):
-        return self.get_text(self.config.getElementsByTagName("save_path")[0])
-
-    def get_package(self):
-        return self.get_text(self.config.getElementsByTagName("package")[0])
-
-    def get_node_machine_list(self):
-        nodes = []
-        for node in self.config.getElementsByTagName("node"):
-            id = self.get_node_name(node)
-            ip = self.get_node_ip(node)
-            nodes.append(Machine(id, ip))
-        return nodes
-
-    def get_dataset_list(self):
-        nodes = []
-        for node in self.config.getElementsByTagName("dataset"):
-            name = self.get_dataset_name(node)
-            save_paths = self.get_dataset_save_paths(node)
-            partition_type = self.get_dataset_partition_type(node)
-            partitions = self.get_dataset_partitions(node)
-            tests = self.get_dataset_tests(node)
-            nodes.append(Dataset(name, save_paths, partition_type, partitions, 
tests))
-        return nodes
-
-
-    # 
--------------------------------------------------------------------------
-    # Node Specific Functions
-    # 
--------------------------------------------------------------------------
-    def get_node_ip(self, node):
-        return self.get_text(node.getElementsByTagName("cluster_ip")[0])
-
-    def get_node_name(self, node):
-        return self.get_text(node.getElementsByTagName("id")[0])
-
-    
-    # 
--------------------------------------------------------------------------
-    # Dataset Specific Functions
-    # 
--------------------------------------------------------------------------
-    def get_dataset_name(self, node):
-        return self.get_text(node.getElementsByTagName("name")[0])
-
-    def get_dataset_save_paths(self, node):
-        paths = []
-        for item in node.getElementsByTagName("save_path"):
-            paths.append(self.get_text(item))
-        return paths
-
-    def get_dataset_partition_type(self, node):
-        return self.get_text(node.getElementsByTagName("partition_type")[0])
-
-    def get_dataset_partitions(self, node):
-        paths = []
-        for item in node.getElementsByTagName("partitions_per_path"):
-            paths.append(int(self.get_text(item)))
-        return paths
-
-    def get_dataset_tests(self, node):
-        tests = []
-        for item in node.getElementsByTagName("test"):
-            tests.append(self.get_text(item))
-        return tests
-
-    def get_text(self, xml_node):
-        rc = []
-        for node in xml_node.childNodes:
-            if node.nodeType == node.TEXT_NODE:
-                rc.append(node.data)
-        return ''.join(rc)
-
-class Machine:
-    def __init__(self, id, ip):
-        self.id = id
-        self.ip = ip
-    
-    def get_node_name(self):
-        return self.id
-    
-    def get_node_ip(self):
-        return self.ip
-    
-    def __repr__(self):
-        return self.id + "(" + self.ip + ")"
-    
-class Dataset:
-    def __init__(self, name, save_paths, partition_type, partitions, tests):
-        self.name = name
-        self.save_paths = save_paths
-        self.partitions = partitions
-        self.partition_type = partition_type
-        self.tests = tests
-    
-    def get_name(self):
-        return self.name
-    
-    def get_save_paths(self):
-        return self.save_paths
-    
-    def get_partitions(self):
-        return self.partitions
-    
-    def get_partition_type(self):
-        return self.partition_type
-    
-    def get_tests(self):
-        return self.tests
-    
-    def __repr__(self):
-        return self.name + ":" + str(self.save_paths) + ":" + 
str(self.partitions)
-    

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
deleted file mode 100644
index 04fff52..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_ghcnd.py
+++ /dev/null
@@ -1,95 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# Base URL used to get all the required files.
-BASE_DOWNLOAD_URL = 'http://www1.ncdc.noaa.gov/pub/data/ghcn/daily/'
-
-# List of required files for a build.
-FILE_NAMES = []
-FILE_NAMES.append('ghcnd-countries.txt')
-FILE_NAMES.append('ghcnd-inventory.txt')
-FILE_NAMES.append('ghcnd-states.txt')
-FILE_NAMES.append('ghcnd-stations.txt')
-FILE_NAMES.append('ghcnd-version.txt')
-FILE_NAMES.append('ghcnd_all.tar.gz')
-FILE_NAMES.append('ghcnd_gsn.tar.gz')
-FILE_NAMES.append('ghcnd_hcn.tar.gz')
-FILE_NAMES.append('readme.txt')
-FILE_NAMES.append('status.txt')
-
-# Store the row details here.
-
-# Index values of each field details.
-FIELD_INDEX_NAME = 0
-FIELD_INDEX_START = 1
-FIELD_INDEX_END = 2
-FIELD_INDEX_TYPE = 3
-
-DLY_FIELD_ID = 0
-DLY_FIELD_YEAR = 1
-DLY_FIELD_MONTH = 2
-DLY_FIELD_ELEMENT = 3
-
-DLY_FIELD_DAY_OFFSET = 4
-DLY_FIELD_DAY_FIELDS = 4
-
-DLY_FIELDS = []
-
-# Details about the row.
-DLY_FIELDS.append(['ID', 1, 11, 'Character'])
-DLY_FIELDS.append(['YEAR', 12, 15, 'Integer'])
-DLY_FIELDS.append(['MONTH', 16, 17, 'Integer'])
-DLY_FIELDS.append(['ELEMENT', 18, 21, 'Character'])
-
-# Days in each row.
-for i in range(1, 32):
-    start = 22 + ((i - 1) * 8)
-    DLY_FIELDS.append(['VALUE' + str(i), (start + 0), (start + 4), 'Integer'])
-    DLY_FIELDS.append(['MFLAG' + str(i), (start + 5), (start + 5), 
'Character'])
-    DLY_FIELDS.append(['QFLAG' + str(i), (start + 6), (start + 6), 
'Character'])
-    DLY_FIELDS.append(['SFLAG' + str(i), (start + 7), (start + 7), 
'Character'])
-
-# Details about the row.
-STATIONS_FIELDS = {}
-STATIONS_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-STATIONS_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-STATIONS_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-STATIONS_FIELDS['ELEVATION'] = ['ELEVATION', 32, 37, 'Real']
-STATIONS_FIELDS['STATE'] = ['STATE', 39, 40, 'Character']
-STATIONS_FIELDS['NAME'] = ['NAME', 42, 71, 'Character']
-STATIONS_FIELDS['GSNFLAG'] = ['GSNFLAG', 73, 75, 'Character']
-STATIONS_FIELDS['HCNFLAG'] = ['HCNFLAG', 77, 79, 'Character']
-STATIONS_FIELDS['WMOID'] = ['WMOID', 81, 85, 'Character']
-
-# Details about the row.
-COUNTRIES_FIELDS = {}
-COUNTRIES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-COUNTRIES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-STATES_FIELDS = {}
-STATES_FIELDS['CODE'] = ['CODE', 1, 2, 'Character']
-STATES_FIELDS['NAME'] = ['NAME', 4, 50, 'Character']
-
-# Details about the row.
-INVENTORY_FIELDS = {}
-INVENTORY_FIELDS['ID'] = ['ID', 1, 11, 'Character']
-INVENTORY_FIELDS['LATITUDE'] = ['LATITUDE', 13, 20, 'Real']
-INVENTORY_FIELDS['LONGITUDE'] = ['LONGITUDE', 22, 30, 'Real']
-INVENTORY_FIELDS['ELEMENT'] = ['ELEMENT', 32, 35, 'Character']
-INVENTORY_FIELDS['FIRSTYEAR'] = ['FIRSTYEAR', 37, 40, 'Integer']
-INVENTORY_FIELDS['LASTYEAR'] = ['LASTYEAR', 42, 45, 'Integer']

http://git-wip-us.apache.org/repos/asf/vxquery/blob/3167366d/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
deleted file mode 100644
index 7b1434f..0000000
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_config_mshr.py
+++ /dev/null
@@ -1,78 +0,0 @@
-#!/usr/bin/env python
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# List of required files for a build.
-MSHR_URLS = []
-MSHR_URLS.append('ftp://ftp.ncdc.noaa.gov/pub/data/homr/docs/MSHR_Enhanced_Table.txt')
-MSHR_URLS.append('http://www.ncdc.noaa.gov/homr/file/mshr_enhanced.txt.zip')
-
-# Index values of each field details.
-MSHR_FIELD_INDEX_NAME = 0
-MSHR_FIELD_INDEX_START = 1
-MSHR_FIELD_INDEX_END = 2
-MSHR_FIELD_INDEX_TYPE = 3
-
-# Store the row details here.
-MSHR_FIELDS = {}
-
-# Details about the row.
-MSHR_FIELDS['SOURCE_ID'] = ['SOURCE_ID', 1, 20, 'X(20)']
-MSHR_FIELDS['SOURCE'] = ['SOURCE', 22, 31, 'X(10)']
-MSHR_FIELDS['BEGIN_DATE'] = ['BEGIN_DATE', 33, 40, 'YYYYMMDD']
-MSHR_FIELDS['END_DATE'] = ['END_DATE', 42, 49, 'YYYYMMDD']
-MSHR_FIELDS['STATION_STATUS'] = ['STATION_STATUS', 51, 70, 'X(20)']
-MSHR_FIELDS['NCDCSTN_ID'] = ['NCDCSTN_ID', 72, 91, 'X(20)']
-MSHR_FIELDS['ICAO_ID'] = ['ICAO_ID', 93, 112, 'X(20)']
-MSHR_FIELDS['WBAN_ID'] = ['WBAN_ID', 114, 133, 'X(20)']
-MSHR_FIELDS['FAA_ID'] = ['FAA_ID', 135, 154, 'X(20)']
-MSHR_FIELDS['NWSLI_ID'] = ['NWSLI_ID', 156, 175, 'X(20)']
-MSHR_FIELDS['WMO_ID'] = ['WMO_ID', 177, 196, 'X(20)']
-MSHR_FIELDS['COOP_ID'] = ['COOP_ID', 198, 217, 'X(20)']
-MSHR_FIELDS['TRANSMITTAL_ID'] = ['TRANSMITTAL_ID', 219, 238, 'X(20)']
-MSHR_FIELDS['GHCND_ID'] = ['GHCND_ID', 240, 259, 'X(20)']
-MSHR_FIELDS['NAME_PRINCIPAL'] = ['NAME_PRINCIPAL', 261, 360, 'X(100)']
-MSHR_FIELDS['NAME_PRINCIPAL_SHORT'] = ['NAME_PRINCIPAL_SHORT', 362, 391, 
'X(30)']
-MSHR_FIELDS['NAME_COOP'] = ['NAME_COOP', 393, 492, 'X(100)']
-MSHR_FIELDS['NAME_COOP_SHORT'] = ['NAME_COOP_SHORT', 494, 523, 'X(30)']
-MSHR_FIELDS['NAME_PUBLICATION'] = ['NAME_PUBLICATION', 525, 624, 'X(100)']
-MSHR_FIELDS['NAME_ALIAS'] = ['NAME_ALIAS', 626, 725, 'X(100)']
-MSHR_FIELDS['NWS_CLIM_DIV'] = ['NWS_CLIM_DIV', 727, 736, 'X(10)']
-MSHR_FIELDS['NWS_CLIM_DIV_NAME'] = ['NWS_CLIM_DIV_NAME', 738, 777, 'X(40)']
-MSHR_FIELDS['STATE_PROV'] = ['STATE_PROV', 779, 788, 'X(10)']
-MSHR_FIELDS['COUNTY'] = ['COUNTY', 790, 839, 'X(50)']
-MSHR_FIELDS['NWS_ST_CODE'] = ['NWS_ST_CODE', 841, 842, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_CODE'] = ['FIPS_COUNTRY_CODE', 844, 845, 'X(2)']
-MSHR_FIELDS['FIPS_COUNTRY_NAME'] = ['FIPS_COUNTRY_NAME', 847, 946, 'X(100)']
-MSHR_FIELDS['NWS_REGION'] = ['NWS_REGION', 948, 977, 'X(30)']
-MSHR_FIELDS['NWS_WFO'] = ['NWS_WFO', 979, 988, 'X(10)']
-MSHR_FIELDS['ELEV_GROUND'] = ['ELEV_GROUND', 990, 1029, 'X(40)']
-MSHR_FIELDS['ELEV_GROUND_UNIT'] = ['ELEV_GROUND_UNIT', 1031, 1050, 'X(20)']
-MSHR_FIELDS['ELEV_BAROM'] = ['ELEV_BAROM', 1052, 1091, 'X(40)']
-MSHR_FIELDS['ELEV_BAROM_UNIT'] = ['ELEV_BAROM_UNIT', 1093, 1112, 'X(20)']
-MSHR_FIELDS['ELEV_AIR'] = ['ELEV_AIR', 1114, 1153, 'X(40)']
-MSHR_FIELDS['ELEV_AIR_UNIT'] = ['ELEV_AIR_UNIT', 1155, 1174, 'X(20)']
-MSHR_FIELDS['ELEV_ZERODAT'] = ['ELEV_ZERODAT', 1176, 1215, 'X(40)']
-MSHR_FIELDS['ELEV_ZERODAT_UNIT'] = ['ELEV_ZERODAT_UNIT', 1217, 1236, 'X(20)']
-MSHR_FIELDS['ELEV_UNK'] = ['ELEV_UNK', 1238, 1277, 'X(40)']
-MSHR_FIELDS['ELEV_UNK_UNIT'] = ['ELEV_UNK_UNIT', 1279, 1298, 'X(20)']
-MSHR_FIELDS['LAT_DEC'] = ['LAT_DEC', 1300, 1319, 'X(20)']
-MSHR_FIELDS['LON_DEC'] = ['LON_DEC', 1321, 1340, 'X(20)']
-MSHR_FIELDS['LAT_LON_PRECISION'] = ['LAT_LON_PRECISION', 1342, 1351, 'X(10)']
-MSHR_FIELDS['RELOCATION'] = ['RELOCATION', 1353, 1414, 'X(62)']
-MSHR_FIELDS['UTC_OFFSET'] = ['UTC_OFFSET', 1416, 1431, '9(16)']
-MSHR_FIELDS['OBS_ENV'] = ['OBS_ENV', 1433, 1472, 'X(40) ']
-MSHR_FIELDS['PLATFORM'] = ['PLATFORM', 1474, 1573, 'X(100)']

Reply via email to