Recent round of changes to improve performance in the benchmarks.

- Added more queries to help identify root causes of performance issues.
- Now allow join hash size to be larger than 2G. (int vs long)
- Cache dictionary for node trees.
- In child unnesting, the filter is now cached between calls.
- Created a string builder to help with parsing. (It now skips converting to a 
string.)
- Added CC and NC start up options in a new cluster.properties file.


Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo
Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/b665db73
Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/b665db73
Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/b665db73

Branch: refs/heads/prestonc/november_update
Commit: b665db73434fa4d4f787258981e16c2e7e5a4fc4
Parents: 99ba4db
Author: Preston Carman <[email protected]>
Authored: Thu Dec 18 09:58:23 2014 -0800
Committer: Preston Carman <[email protected]>
Committed: Thu Dec 18 09:58:23 2014 -0800

----------------------------------------------------------------------
 .../other_systems/mrql/q07_count_1940.mrql      |  30 ++
 .../other_systems/mrql/q07_count_1960.mrql      |  30 ++
 .../other_systems/mrql/q07_count_1980.mrql      |  30 ++
 .../other_systems/mrql/q07_count_2000.mrql      |  30 ++
 .../other_systems/mrql/q07_count_join.mrql      |   6 +-
 .../other_systems/mrql/q07_count_left.mrql      |  21 ++
 .../other_systems/mrql/q07_count_tmax.mrql      |   2 +-
 .../other_systems/mrql/q07_count_tmin.mrql      |   2 +-
 .../other_systems/mrql/q07_data_tmax.mrql       |  20 ++
 .../other_systems/mrql/q07_data_tmin.mrql       |  20 ++
 .../other_systems/mrql/q07_filter_1940.mrql     |  30 ++
 .../other_systems/mrql/q07_filter_1960.mrql     |  30 ++
 .../other_systems/mrql/q07_filter_1980.mrql     |  30 ++
 .../other_systems/mrql/q07_filter_2000.mrql     |  30 ++
 .../mrql_scripts/run_group_test.sh              |   4 +-
 .../noaa-ghcn-daily/queries/q07_count_1940.xq   |  35 +++
 .../noaa-ghcn-daily/queries/q07_count_1960.xq   |  35 +++
 .../noaa-ghcn-daily/queries/q07_count_1980.xq   |  35 +++
 .../noaa-ghcn-daily/queries/q07_count_2000.xq   |  35 +++
 .../noaa-ghcn-daily/queries/q07_count_left.xq   |  27 ++
 .../noaa-ghcn-daily/queries/q07_data_tmax.xq    |  26 ++
 .../noaa-ghcn-daily/queries/q07_data_tmin.xq    |  26 ++
 .../noaa-ghcn-daily/queries/q07_filter_1940.xq  |  35 +++
 .../noaa-ghcn-daily/queries/q07_filter_1960.xq  |  35 +++
 .../noaa-ghcn-daily/queries/q07_filter_1980.xq  |  35 +++
 .../noaa-ghcn-daily/queries/q07_filter_2000.xq  |  35 +++
 .../noaa-ghcn-daily/scripts/run_benchmark.sh    |   4 +-
 .../scripts/run_benchmark_cluster.sh            |   9 +-
 .../scripts/testing_logging.properties          |  10 +-
 .../scripts/weather_benchmark.py                |  13 +-
 .../java/org/apache/vxquery/cli/VXQuery.java    |   3 +-
 .../builders/nodes/DictionaryBuilder.java       |  21 +-
 .../builders/nodes/UTF8StringBuilder.java       |  53 ++++
 .../functions/step/ChildPathStepUnnesting.java  |  10 +-
 .../runtime/functions/util/FunctionHelper.java  |   6 +
 .../vxquery/xmlparser/SAXContentHandler.java    |  78 ++---
 .../xmlquery/query/XMLQueryCompiler.java        |   8 +-
 vxquery-server/pom.xml                          | 305 ++++++++++---------
 .../src/main/resources/conf/cluster.properties  |  54 ++++
 .../src/main/resources/scripts/startcc.sh       |  21 +-
 .../src/main/resources/scripts/startnc.sh       |  19 +-
 .../src/main/resources/scripts/stopcc.sh        |   4 +
 .../src/main/resources/scripts/stopcluster.sh   |  12 +-
 .../src/main/resources/scripts/stopnc.sh        |   4 +
 44 files changed, 1097 insertions(+), 211 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql
new file mode 100644
index 0000000..268f211
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1960
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1960
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql
new file mode 100644
index 0000000..268f211
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1960
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1960
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql
new file mode 100644
index 0000000..2921cf6
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1980
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1980
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql
new file mode 100644
index 0000000..8bfcb63
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 2000
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 2000
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
index 8dec470..c48ae67 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql
@@ -17,10 +17,10 @@
 count(
     select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
     from rtmax in source(xml, args[0], {"data"}),
-        rtmin in source(xml, args[0], {"data"})
+        rtmin in source(xml, args[2], {"data"})
     where text(rtmax.date) = text(rtmin.date)
         and text(rtmax.station) = text(rtmin.station)
-        and text(r.dataType) = "TMAX"
-        and text(r.dataType) = "TMIN"
+        and text(rtmax.dataType) = "TMAX"
+        and text(rtmin.dataType) = "TMIN"
 )
 ;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql
new file mode 100644
index 0000000..c4325f3
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+count(
+    select (rtmin)
+    from rtmin in source(xml, args[0], {"data"})
+)
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
index ca8ab4c..49cca89 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql
@@ -17,6 +17,6 @@
 count(
     select (rtmax)
     from rtmax in source(xml, args[0], {"data"})
-    where text(r.dataType) = "TMAX"
+    where text(rtmax.dataType) = "TMAX"
 )
 ;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
index fe17ebe..00ae56f 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql
@@ -17,6 +17,6 @@
 count(
     select (rtmin)
     from rtmin in source(xml, args[0], {"data"})
-    where text(r.dataType) = "TMIN"
+    where text(rtmin.dataType) = "TMIN"
 )
 ;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql
new file mode 100644
index 0000000..374a0e3
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+    select (rtmax)
+    from rtmax in source(xml, args[0], {"data"})
+    where text(r.dataType) = "TMAX"
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql
new file mode 100644
index 0000000..10e2fe9
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql
@@ -0,0 +1,20 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+    select (rtmin)
+    from rtmin in source(xml, args[0], {"data"})
+    where text(r.dataType) = "TMIN"
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql
new file mode 100644
index 0000000..54699fc
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1960
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1960
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql
new file mode 100644
index 0000000..54699fc
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1960
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1960
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql
new file mode 100644
index 0000000..bff559b
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 1980
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 1980
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql
new file mode 100644
index 0000000..052a0c9
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql
@@ -0,0 +1,30 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+avg(
+    select (toInt(text(rtmax.value))-toInt(text(rtmin.value)))
+    from rtmax in source(xml, args[0], {"data"}),
+        dtmax in rtmax.date,
+        rtmin in source(xml, args[2], {"data"}),
+        dtmin in rtmin.date
+    where text(rtmax.date) = text(rtmin.date)
+        and text(rtmax.station) = text(rtmin.station)
+        and text(rtmax.dataType) = "TMAX"
+        and toInt(substring(text(dtmax), 0, 4)) > 2000
+        and text(rtmin.dataType) = "TMIN"
+        and toInt(substring(text(dtmin), 0, 4)) > 2000
+) / 10
+;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
index 134c05e..84028e5 100755
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh
@@ -32,8 +32,8 @@ fi
 DATASET=${1}
 NODES=${2}
 REPEAT=1
-#DATA_FILES=${NODES}
-DATA_FILES=8
+DATA_FILES=${NODES}
+#DATA_FILES=8
 
 # Start Hadoop
 sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq
new file mode 100644
index 0000000..c70b4d2
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1940
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1940
+    return $r_max/value - $r_min/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq
new file mode 100644
index 0000000..26b08f9
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1960
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1960
+    return $r_max/value - $r_min/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq
new file mode 100644
index 0000000..daa760b
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1980
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1980
+    return $r_max/value - $r_min/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq
new file mode 100644
index 0000000..b905807
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:count(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 2000
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 2000
+    return $r_max/value - $r_min/value
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq
new file mode 100644
index 0000000..0639ff4
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq
@@ -0,0 +1,27 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+fn:count(
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    return $r_max
+)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq
new file mode 100644
index 0000000..e511918
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq
@@ -0,0 +1,26 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMAX.
+:)
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    where $r_max/dataType eq "TMAX"
+    return $r_max

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq
new file mode 100644
index 0000000..579859f
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq
@@ -0,0 +1,26 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(:
+XQuery Join Query
+-------------------
+Find the all the records for TMIN.
+:)
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    where $r_min/dataType eq "TMIN"
+    return $r_min

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq
new file mode 100644
index 0000000..e122494
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1940
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1940
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq
new file mode 100644
index 0000000..7888560
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1960
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1960
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq
new file mode 100644
index 0000000..0b0cbd3
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1980
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1980
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq
new file mode 100644
index 0000000..b343bd0
--- /dev/null
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq
@@ -0,0 +1,35 @@
+(: Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+   
+     http://www.apache.org/licenses/LICENSE-2.0
+   
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License. :)
+
+(: XQuery Self Join Query :)
+(: Self join with all stations finding the difference in min and max       :)
+(: temperature and get the average.                                        :)
+fn:avg(
+    let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_min in collection($sensor_collection_min)/dataCollection/data
+    
+    let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors"
+    for $r_max in collection($sensor_collection_max)/dataCollection/data
+    
+    where $r_min/station eq $r_max/station
+        and $r_min/date eq $r_max/date
+        and $r_min/dataType eq "TMIN"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 2000
+        and $r_max/dataType eq "TMAX"
+        and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 2000
+    return $r_max/value - $r_min/value
+) div 10
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
index 8bc6772..c4d8922 100755
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh
@@ -29,7 +29,7 @@ REPEAT=5
 IGNORE=2
 FRAME_SIZE=$((8*1024))
 BUFFER_SIZE=$((32*1024*1024))
-JOIN_HASH_SIZE=$(( 4 * (64*1024*1024) ))
+JOIN_HASH_SIZE=$(( 6 * (1024*1024*1024) / 8 ))
 
 if [ -z "${1}" ]
 then
@@ -37,7 +37,7 @@ then
     exit
 fi
 
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx7G 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties"
 
 for j in $(find ${1} -name '*q??.xq')
 do

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
index c90a7a9..7a0c7a9 100755
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh
@@ -25,11 +25,12 @@
 # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ 
"-client-net-ip-address 169.235.27.138"
 # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03
 #
-CLUSTER="uci"
-REPEAT=5
+CLUSTER="rita"
+REPEAT=1
 FRAME_SIZE=$((8*1024))
 BUFFER_SIZE=$((32*1024*1024))
-JOIN_HASH_SIZE=$((4*4*64*1024*1024))
+JOIN_HASH_SIZE=$((1024*1024*1024))
+#JOIN_HASH_SIZE=-1
 
 if [ -z "${1}" ]
 then
@@ -50,7 +51,7 @@ python 
vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-serve
 # wait for cluster to finish setting up  
 sleep 5
 
-export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties"
+export JAVA_OPTS="$JAVA_OPTS -server -Xmx7G 
-Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties"
 
 for j in $(find ${1} -name '*q??.xq')
 do

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
index ff877dd..672e456 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties
@@ -43,9 +43,9 @@ handlers= java.util.logging.ConsoleHandler
 # Note that the ConsoleHandler also has a separate level
 # setting to limit messages printed to the console.
 
-#.level= WARNING
-# .level= INFO
-.level= FINE
+# .level= WARNING
+ .level= INFO
+# .level= FINE
 # .level = FINEST
 
 ############################################################
@@ -75,5 +75,5 @@ java.util.logging.ConsoleHandler.formatter = 
java.util.logging.SimpleFormatter
 # messages:
 
 # edu.uci.ics.asterix.level = FINE
-edu.uci.ics.algebricks.level = FINE
-# edu.uci.ics.hyracks.level = FINE
+# edu.uci.ics.hyracks.algebricks.level = FINE
+edu.uci.ics.hyracks.level = FINE

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
----------------------------------------------------------------------
diff --git 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
index 746fef4..fbd7b04 100644
--- 
a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
+++ 
b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py
@@ -59,9 +59,20 @@ class WeatherBenchmark:
                           "q06_count_join.xq",
                           "q06_count_sensor.xq",
                           "q06_count_station.xq",
+                          "q07_count_1940.xq",
+                          "q07_count_1960.xq",
+                          "q07_count_1980.xq",
+                          "q07_count_2000.xq",
                           "q07_count_join.xq",
+                          "q07_count_left.xq",
                           "q07_count_tmin.xq",
                           "q07_count_tmax.xq",
+                          "q07_data_tmin.xq",
+                          "q07_data_tmax.xq",
+                          "q07_filter_1940.xq",
+                          "q07_filter_1960.xq",
+                          "q07_filter_1980.xq",
+                          "q07_filter_2000.xq",
                           ] 
     BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] 
     BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] 
@@ -364,7 +375,7 @@ def get_cluster_query_path(base_paths, test, partition, 
nodes):
 
 def get_cluster_virtual_disk_partitions(nodes, partitions):
     vp = get_local_virtual_disk_partitions(partitions)
-    vn = calculate_partitions(range(1, len(nodes)+1, 1))
+    vn = calculate_partitions(range(1, len(nodes) + 1, 1))
     return vp * vn
 
 def get_local_virtual_disk_partitions(partitions):

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
----------------------------------------------------------------------
diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java 
b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
index 53d9ec2..1e083df 100644
--- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
+++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java
@@ -389,6 +389,7 @@ public class VXQuery {
             ncConfig.dataIPAddress = "127.0.0.1";
             ncConfig.datasetIPAddress = "127.0.0.1";
             ncConfig.nodeId = "nc" + (i + 1);
+            ncConfig.ioDevices = "/tmp";
             ncs[i] = new NodeControllerService(ncConfig);
             ncs[i].start();
         }
@@ -450,7 +451,7 @@ public class VXQuery {
         private int frameSize = 65536;
 
         @Option(name = "-join-hash-size", usage = "Join hash size in bytes. 
(default 67,108,864)")
-        private int joinHashSize = -1;
+        private long joinHashSize = -1;
 
         @Option(name = "-buffer-size", usage = "Disk read buffer size in 
bytes.")
         private int bufferSize = -1;

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java
index db7ecf7..bee221d 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java
@@ -40,10 +40,12 @@ public class DictionaryBuilder {
 
     private final DataOutput dataBufferOut;
 
-    private final ByteArrayAccessibleOutputStream tempStringData;
+    private final ArrayBackedValueStorage cache;
 
     private final TreeMap<String, Integer> hashSlotIndexes;
 
+    private boolean cacheReady;
+
     private final IValueReferenceVector sortedStringsVector = new 
IValueReferenceVector() {
         @Override
         public int getStart(int index) {
@@ -74,16 +76,27 @@ public class DictionaryBuilder {
         sortedSlotIndexes = new GrowableIntArray();
         dataBuffer = new ByteArrayAccessibleOutputStream();
         dataBufferOut = new DataOutputStream(dataBuffer);
-        tempStringData = new ByteArrayAccessibleOutputStream();
+        cache = new ArrayBackedValueStorage();
         hashSlotIndexes = new TreeMap<String, Integer>();
+        cacheReady = false;
     }
 
     public void reset() {
         stringEndOffsets.clear();
         sortedSlotIndexes.clear();
         dataBuffer.reset();
-        tempStringData.reset();
         hashSlotIndexes.clear();
+        cacheReady = false;
+    }
+
+    public void writeFromCache(ArrayBackedValueStorage abvs) throws 
IOException {
+        if (!cacheReady) {
+            cache.reset();
+            write(cache);
+            cacheReady = true;
+        }
+        DataOutput out = abvs.getDataOutput();
+        out.write(cache.getByteArray(), cache.getStartOffset(), 
cache.getLength());
     }
 
     public void write(ArrayBackedValueStorage abvs) throws IOException {
@@ -122,6 +135,7 @@ public class DictionaryBuilder {
             }
             stringEndOffsets.append(dataBuffer.size());
             hashSlotIndexes.put(str, slotIndex);
+            cacheReady = false;
         }
         return slotIndex;
     }
@@ -141,6 +155,7 @@ public class DictionaryBuilder {
         }
         stringEndOffsets.append(dataBuffer.size());
         sortedSlotIndexes.insert(index, slotIndex);
+        cacheReady = false;
         return slotIndex;
     }
 }
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java
new file mode 100644
index 0000000..3b4eea0
--- /dev/null
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java
@@ -0,0 +1,53 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.vxquery.datamodel.builders.nodes;
+
+import java.io.DataOutput;
+import java.io.IOException;
+import java.io.UTFDataFormatException;
+
+import org.apache.vxquery.runtime.functions.util.FunctionHelper;
+
+import edu.uci.ics.hyracks.data.std.api.IMutableValueStorage;
+import edu.uci.ics.hyracks.data.std.primitive.BytePointable;
+
+public class UTF8StringBuilder extends AbstractNodeBuilder {
+    private IMutableValueStorage mvs;
+    private DataOutput out;
+
+    @Override
+    public void reset(IMutableValueStorage mvs) throws IOException {
+        this.mvs = mvs;
+        out = mvs.getDataOutput();
+        out.write(0);
+        out.write(0);
+    }
+
+    @Override
+    public void finish() throws IOException {
+        int utflen = mvs.getLength() - 2;
+        BytePointable.setByte(mvs.getByteArray(), 0, (byte) ((utflen >>> 8) & 
0xFF));
+        BytePointable.setByte(mvs.getByteArray(), 1, (byte) ((utflen >>> 0) & 
0xFF));
+    }
+
+    public void appendCharArray(char[] ch, int start, int length) throws 
IOException {
+        FunctionHelper.writeCharArray(ch, start, length, out);
+        if (mvs.getLength() > 65535) {
+            throw new UTFDataFormatException("encoded string too long: " + 
mvs.getLength() + " bytes");
+        }
+    }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java
index ea65066..dbae9de 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java
@@ -43,6 +43,7 @@ public class ChildPathStepUnnesting extends 
AbstractForwardAxisPathStep {
     private final TaggedValuePointable tvpNtp = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
     private final TaggedValuePointable tvpStep = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
     INodeFilter filter;
+    int filterLookupID = -1;
 
     public ChildPathStepUnnesting(IHyracksTaskContext ctx, PointablePool pp) {
         super(ctx, pp);
@@ -56,9 +57,12 @@ public class ChildPathStepUnnesting extends 
AbstractForwardAxisPathStep {
             throw new IllegalArgumentException("Expected int value tag, got: " 
+ args[1].getTag());
         }
         args[1].getValue(ip);
-        SequenceType sType = 
dCtx.getStaticContext().lookupSequenceType(ip.getInteger());
-        filter = NodeTestFilter.getNodeTestFilter(sType);
-
+        if (ip.getInteger() != filterLookupID) {
+            filterLookupID = ip.getInteger();
+            SequenceType sType = 
dCtx.getStaticContext().lookupSequenceType(ip.getInteger());
+            filter = NodeTestFilter.getNodeTestFilter(sType);
+        }
+        
         if (args[0].getTag() == ValueTag.SEQUENCE_TAG) {
             args[0].getValue(seqNtp);
             seqArgsLength = seqNtp.getEntryCount();

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
index 3b60e4e..3a20a86 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java
@@ -1260,6 +1260,12 @@ public class FunctionHelper {
         }
     }
 
+    public static void writeCharArray(char[] ch, int start, int length, 
DataOutput dOut) {
+        for (int i = start; i < start + length; ++i) {
+            writeChar(ch[i], dOut);
+        }
+    }
+
     public static void writeDateAsString(IDate date, DataOutput dOut) {
         // Year
         writeNumberWithPadding(date.getYear(), 4, dOut);

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
index 37ce001..e40147e 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java
@@ -30,6 +30,7 @@ import 
org.apache.vxquery.datamodel.builders.nodes.DocumentNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.ElementNodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.PINodeBuilder;
 import org.apache.vxquery.datamodel.builders.nodes.TextNodeBuilder;
+import org.apache.vxquery.datamodel.builders.nodes.UTF8StringBuilder;
 import org.apache.vxquery.datamodel.values.ValueTag;
 import org.apache.vxquery.types.BuiltinTypeQNames;
 import org.apache.vxquery.types.ElementType;
@@ -59,6 +60,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     private final DocumentNodeBuilder docb;
     private final PINodeBuilder pinb;
     private final TextNodeBuilder tnb;
+    private final UTF8StringBuilder utf8b;
     private final List<ElementNodeBuilder> enbStack;
     private final List<ElementNodeBuilder> freeENBList;
 
@@ -78,16 +80,14 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     // Basic tracking and setting variables
     private final boolean attachTypes;
-    private final StringBuilder buffer;
     private final boolean createNodeIds;
     private int depth;
-    private final ArrayBackedValueStorage docABVS;
-    private final ArrayBackedValueStorage elementABVS;
+    private final ArrayBackedValueStorage resultABVS;
     private boolean pendingText;
     private int nodeIdCounter;
     private final ITreeNodeIdProvider nodeIdProvider;
-    private final ArrayBackedValueStorage resultABVS;
     private final ArrayBackedValueStorage tempABVS;
+    private final ArrayBackedValueStorage textABVS;
 
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider 
nodeIdProvider) {
         // XML node builders
@@ -97,25 +97,24 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         docb = new DocumentNodeBuilder();
         pinb = new PINodeBuilder();
         tnb = new TextNodeBuilder();
+        utf8b = new UTF8StringBuilder();
         enbStack = new ArrayList<ElementNodeBuilder>();
         freeENBList = new ArrayList<ElementNodeBuilder>();
-        
+
         // Element writing and path step variables
         skipping = true;
         tvp = (TaggedValuePointable) 
TaggedValuePointable.FACTORY.createPointable();
-            
+
         // Basic tracking and setting variables
         this.attachTypes = attachTypes;
-        buffer = new StringBuilder();
         createNodeIds = nodeIdProvider != null;
         depth = 0;
-        docABVS = new ArrayBackedValueStorage();
-        elementABVS = new ArrayBackedValueStorage();
+        resultABVS = new ArrayBackedValueStorage();
         pendingText = false;
         nodeIdCounter = 0;
         this.nodeIdProvider = nodeIdProvider;
-        resultABVS = new ArrayBackedValueStorage();
         tempABVS = new ArrayBackedValueStorage();
+        textABVS = new ArrayBackedValueStorage();
     }
 
     public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider 
nodeIdProvider, ByteBuffer frame,
@@ -141,7 +140,7 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             ElementType eType = (ElementType) nodeType;
             NameTest nameTest = eType.getNameTest();
             childUri[index] = getStringFromBytes(nameTest.getUri());
-            childLocalName[index] = 
getStringFromBytes(nameTest.getLocalName());;
+            childLocalName[index] = 
getStringFromBytes(nameTest.getLocalName());
             ++index;
         }
     }
@@ -157,7 +156,12 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         if (skipping) {
             return;
         }
-        buffer.append(ch, start, length);
+        try {
+            utf8b.appendCharArray(ch, start, length);
+        } catch (IOException e) {
+            e.printStackTrace();
+            throw new SAXException(e);
+        }
         pendingText = true;
     }
 
@@ -255,12 +259,18 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
             skipping = false;
         }
         db.reset();
-        docABVS.reset();
+        try {
+            textABVS.reset();
+            utf8b.reset(textABVS);
+        } catch (IOException e) {
+            throw new SAXException(e);
+        }
         if (skipping) {
             return;
         }
         try {
-            docb.reset(docABVS);
+            resultABVS.reset();
+            docb.reset(resultABVS);
             if (createNodeIds) {
                 docb.setLocalNodeId(nodeIdCounter++);
             }
@@ -363,15 +373,15 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         try {
             flushText();
             startChildInParent(cnb);
-            buffer.append(ch, start, length);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(buffer.toString());
             if (createNodeIds) {
                 cnb.setLocalNodeId(nodeIdCounter++);
             }
-            cnb.setValue(tempABVS);
+            utf8b.appendCharArray(ch, start, length);
+            utf8b.finish();
+            cnb.setValue(textABVS);
             endChildInParent(cnb);
-            buffer.delete(0, buffer.length());
+            textABVS.reset();
+            utf8b.reset(textABVS);
         } catch (IOException e) {
             e.printStackTrace();
             throw new SAXException(e);
@@ -381,14 +391,14 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     private void flushText() throws IOException {
         if (pendingText) {
             peekENBStackTop().startChild(tnb);
-            tempABVS.reset();
-            tempABVS.getDataOutput().writeUTF(buffer.toString());
             if (createNodeIds) {
                 tnb.setLocalNodeId(nodeIdCounter++);
             }
-            tnb.setValue(tempABVS);
+            utf8b.finish();
+            tnb.setValue(textABVS);
             peekENBStackTop().endChild(tnb);
-            buffer.delete(0, buffer.length());
+            textABVS.reset();
+            utf8b.reset(textABVS);
             pendingText = false;
         }
     }
@@ -418,8 +428,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
     }
 
     public void writeElement() throws IOException {
-        resultABVS.reset();
-        DataOutput out = resultABVS.getDataOutput();
+        tempABVS.reset();
+        DataOutput out = tempABVS.getDataOutput();
         out.write(ValueTag.NODE_TREE_TAG);
         byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK;
         if (attachTypes) {
@@ -432,13 +442,9 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         if (createNodeIds) {
             out.writeInt(nodeIdProvider.getId());
         }
-        db.write(resultABVS);
-        if (subElement == null) {
-            out.write(docABVS.getByteArray(), docABVS.getStartOffset(), 
docABVS.getLength());
-        } else {
-            out.write(elementABVS.getByteArray(), 
elementABVS.getStartOffset(), elementABVS.getLength());
-        }
-        tvp.set(resultABVS.getByteArray(), resultABVS.getStartOffset(), 
resultABVS.getLength());
+        db.writeFromCache(tempABVS);
+        out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), 
resultABVS.getLength());
+        tvp.set(tempABVS.getByteArray(), tempABVS.getStartOffset(), 
tempABVS.getLength());
         addNodeToTuple(tvp, tupleIndex);
         skipping = true;
     }
@@ -457,8 +463,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
         if (createNodeIds) {
             out.writeInt(nodeIdProvider.getId());
         }
-        db.write(abvs);
-        out.write(docABVS.getByteArray(), docABVS.getStartOffset(), 
docABVS.getLength());
+        db.writeFromCache(abvs);
+        out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), 
resultABVS.getLength());
     }
 
     private ElementNodeBuilder createENB() {
@@ -482,8 +488,8 @@ public class SAXContentHandler implements ContentHandler, 
LexicalHandler {
 
     private void startChildInParent(AbstractNodeBuilder anb, boolean 
startNewElement) throws IOException {
         if (startNewElement) {
-            elementABVS.reset();
-            anb.reset(elementABVS);
+            resultABVS.reset();
+            anb.reset(resultABVS);
         } else if (enbStack.isEmpty()) {
             docb.startChild(anb);
         } else {

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
----------------------------------------------------------------------
diff --git 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
index 44f2179..0e18328 100644
--- 
a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
+++ 
b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java
@@ -94,7 +94,7 @@ public class XMLQueryCompiler {
     }
 
     public XMLQueryCompiler(XQueryCompilationListener listener, String[] 
nodeList, int frameSize,
-            int availableProcessors, int joinHashSize) {
+            int availableProcessors, long joinHashSize) {
         this.listener = listener == null ? 
NoopXQueryCompilationListener.INSTANCE : listener;
         this.frameSize = frameSize;
         this.nodeList = nodeList;
@@ -113,8 +113,12 @@ public class XMLQueryCompiler {
                 });
         builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize);
         if (joinHashSize > 0) {
-            
builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize/this.frameSize);
+            
builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash((int) 
(joinHashSize / this.frameSize));
         }
+
+        
builder.getPhysicalOptimizationConfig().setMaxFramesLeftInputHybridHash(
+                (int) (60L * 1024 * 1048576 / this.frameSize));
+
         builder.setLogicalRewrites(buildDefaultLogicalRewrites());
         builder.setPhysicalRewrites(buildDefaultPhysicalRewrites());
         builder.setSerializerDeserializerProvider(new 
ISerializerDeserializerProvider() {

http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-server/pom.xml
----------------------------------------------------------------------
diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml
index 6c99712..dfd3ab9 100644
--- a/vxquery-server/pom.xml
+++ b/vxquery-server/pom.xml
@@ -14,153 +14,172 @@
   See the License for the specific language governing permissions and
   limitations under the License.
 -->
-<project xmlns="http://maven.apache.org/POM/4.0.0"; 
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"; 
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
-  <modelVersion>4.0.0</modelVersion>
+<project
+    xmlns="http://maven.apache.org/POM/4.0.0";
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+    xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/maven-v4_0_0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
 
-  <parent>
-    <groupId>org.apache.vxquery</groupId>
-    <artifactId>apache-vxquery</artifactId>
-    <version>0.5-SNAPSHOT</version>
-  </parent>
+    <parent>
+        <groupId>org.apache.vxquery</groupId>
+        <artifactId>apache-vxquery</artifactId>
+        <version>0.5-SNAPSHOT</version>
+    </parent>
 
-  <artifactId>apache-vxquery-server</artifactId>
-  <packaging>jar</packaging>
-  <name>VXQuery Server</name>
-  <description>Apache VXQuery Server</description>
+    <artifactId>apache-vxquery-server</artifactId>
+    <packaging>jar</packaging>
+    <name>VXQuery Server</name>
+    <description>Apache VXQuery Server</description>
 
-  <distributionManagement>
-    <site>
-      <id>vxquery.website</id>
-      <name>VXQuery Website</name>
-      <url>file:../site/vxquery-server/</url>
-    </site>
-  </distributionManagement>
+    <distributionManagement>
+        <site>
+            <id>vxquery.website</id>
+            <name>VXQuery Website</name>
+            <url>file:../site/vxquery-server/</url>
+        </site>
+    </distributionManagement>
 
-  <build>
-    <plugins>
-      <plugin>
-        <groupId>org.codehaus.mojo</groupId>
-        <artifactId>appassembler-maven-plugin</artifactId>
-        <version>1.1.1</version>
-        <executions>
-          <execution>
-            <configuration>
-              <programs>
-                <program>
-                  
<mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass>
-                  <name>vxqueryshutdown</name>
-                </program>
-                <program>
-                  
<mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
-                  <name>vxquerycc</name>
-                </program>
-                <program>
-                  
<mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
-                  <name>vxquerync</name>
-                </program>
-              </programs>
-              <repositoryLayout>flat</repositoryLayout>
-              <repositoryName>lib</repositoryName>
-            </configuration>
-            <phase>package</phase>
-            <goals>
-              <goal>assemble</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-resources-plugin</artifactId>
-        <version>2.5</version>
-        <executions>
-          <execution>
-            <id>copy-scripts</id>
-            <!-- here the phase you need -->
-            <phase>package</phase>
-            <goals>
-              <goal>copy-resources</goal>
-            </goals>
-            <configuration>
-              <outputDirectory>target/appassembler/bin</outputDirectory>
-              <resources>
-                <resource>
-                  <directory>src/main/resources/scripts</directory>
-                </resource>
-              </resources>
-            </configuration>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <artifactId>maven-assembly-plugin</artifactId>
-        <version>2.4</version>
-        <executions>
-          <execution>
-            <configuration>
-              <descriptors>
-                <descriptor>src/main/assembly/binary-assembly.xml</descriptor>
-              </descriptors>
-            </configuration>
-            <phase>package</phase>
-            <goals>
-              <goal>attached</goal>
-            </goals>
-          </execution>
-        </executions>
-      </plugin>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-site-plugin</artifactId>
-      </plugin>
-      <plugin>
-        <artifactId>maven-antrun-plugin</artifactId>
-        <executions>
-          <execution>
-            <id>generate-site</id>
-            <phase>none</phase>
-          </execution>          
-        </executions>
-      </plugin>
-    </plugins>
-  </build>
-  
-  <dependencies>
-    <dependency>
-      <groupId>org.apache.vxquery</groupId>
-      <artifactId>apache-vxquery-core</artifactId>
-      <version>0.5-SNAPSHOT</version>
-    </dependency>
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>appassembler-maven-plugin</artifactId>
+                <version>1.1.1</version>
+                <executions>
+                    <execution>
+                        <configuration>
+                            <programs>
+                                <program>
+                                    
<mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass>
+                                    <name>vxqueryshutdown</name>
+                                </program>
+                                <program>
+                                    
<mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass>
+                                    <name>vxquerycc</name>
+                                </program>
+                                <program>
+                                    
<mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass>
+                                    <name>vxquerync</name>
+                                </program>
+                            </programs>
+                            <repositoryLayout>flat</repositoryLayout>
+                            <repositoryName>lib</repositoryName>
+                        </configuration>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>assemble</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-resources-plugin</artifactId>
+                <version>2.5</version>
+                <executions>
+                    <execution>
+                        <id>copy-scripts</id>
+                        <!-- here the phase you need -->
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy-resources</goal>
+                        </goals>
+                        <configuration>
+                            
<outputDirectory>target/appassembler/bin</outputDirectory>
+                            <resources>
+                                <resource>
+                                    
<directory>src/main/resources/scripts</directory>
+                                </resource>
+                            </resources>
+                        </configuration>
+                    </execution>
+                    <execution>
+                        <id>copy-conf</id>
+                        <!-- here the phase you need -->
+                        <phase>package</phase>
+                        <goals>
+                            <goal>copy-resources</goal>
+                        </goals>
+                        <configuration>
+                            
<outputDirectory>target/appassembler/conf</outputDirectory>
+                            <resources>
+                                <resource>
+                                    
<directory>src/main/resources/conf</directory>
+                                </resource>
+                            </resources>
+                        </configuration>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <artifactId>maven-assembly-plugin</artifactId>
+                <version>2.4</version>
+                <executions>
+                    <execution>
+                        <configuration>
+                            <descriptors>
+                                
<descriptor>src/main/assembly/binary-assembly.xml</descriptor>
+                            </descriptors>
+                        </configuration>
+                        <phase>package</phase>
+                        <goals>
+                            <goal>attached</goal>
+                        </goals>
+                    </execution>
+                </executions>
+            </plugin>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-site-plugin</artifactId>
+            </plugin>
+            <plugin>
+                <artifactId>maven-antrun-plugin</artifactId>
+                <executions>
+                    <execution>
+                        <id>generate-site</id>
+                        <phase>none</phase>
+                    </execution>
+                </executions>
+            </plugin>
+        </plugins>
+    </build>
 
-    <dependency>
-      <groupId>edu.uci.ics.hyracks</groupId>
-      <artifactId>hyracks-control-cc</artifactId>
-    </dependency>
-    
-    <dependency>
-      <groupId>edu.uci.ics.hyracks</groupId>
-      <artifactId>hyracks-control-nc</artifactId>
-    </dependency>
-  </dependencies>
+    <dependencies>
+        <dependency>
+            <groupId>org.apache.vxquery</groupId>
+            <artifactId>apache-vxquery-core</artifactId>
+            <version>0.5-SNAPSHOT</version>
+        </dependency>
 
-  <reporting>
-    <plugins>
-      <plugin>
-        <groupId>org.apache.maven.plugins</groupId>
-        <artifactId>maven-project-info-reports-plugin</artifactId>
-        <reportSets>
-          <reportSet>
-            <reports>
-              <report>index</report>
-              <report>dependencies</report>
-              <report>plugins</report>
-            </reports>
-          </reportSet>
-        </reportSets>
-        <configuration>
-          <linkOnly>true</linkOnly>
-          <dependencyLocationsEnabled>false</dependencyLocationsEnabled>
-        </configuration>
-      </plugin>
-    </plugins>
-  </reporting>
+        <dependency>
+            <groupId>edu.uci.ics.hyracks</groupId>
+            <artifactId>hyracks-control-cc</artifactId>
+        </dependency>
+
+        <dependency>
+            <groupId>edu.uci.ics.hyracks</groupId>
+            <artifactId>hyracks-control-nc</artifactId>
+        </dependency>
+    </dependencies>
+
+    <reporting>
+        <plugins>
+            <plugin>
+                <groupId>org.apache.maven.plugins</groupId>
+                <artifactId>maven-project-info-reports-plugin</artifactId>
+                <reportSets>
+                    <reportSet>
+                        <reports>
+                            <report>index</report>
+                            <report>dependencies</report>
+                            <report>plugins</report>
+                        </reports>
+                    </reportSet>
+                </reportSets>
+                <configuration>
+                    <linkOnly>true</linkOnly>
+                    
<dependencyLocationsEnabled>false</dependencyLocationsEnabled>
+                </configuration>
+            </plugin>
+        </plugins>
+    </reporting>
 </project>

Reply via email to