Recent round of changes to improve performance in the benchmarks. - Added more queries to help identify root causes of performance issues. - Now allow join hash size to be larger than 2G. (int vs long) - Cache dictionary for node trees. - In child unnesting, the filter is now cached between calls. - Created a string builder to help with parsing. (It now skips converting to a string.) - Added CC and NC start up options in a new cluster.properties file.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/b665db73 Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/b665db73 Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/b665db73 Branch: refs/heads/prestonc/november_update Commit: b665db73434fa4d4f787258981e16c2e7e5a4fc4 Parents: 99ba4db Author: Preston Carman <[email protected]> Authored: Thu Dec 18 09:58:23 2014 -0800 Committer: Preston Carman <[email protected]> Committed: Thu Dec 18 09:58:23 2014 -0800 ---------------------------------------------------------------------- .../other_systems/mrql/q07_count_1940.mrql | 30 ++ .../other_systems/mrql/q07_count_1960.mrql | 30 ++ .../other_systems/mrql/q07_count_1980.mrql | 30 ++ .../other_systems/mrql/q07_count_2000.mrql | 30 ++ .../other_systems/mrql/q07_count_join.mrql | 6 +- .../other_systems/mrql/q07_count_left.mrql | 21 ++ .../other_systems/mrql/q07_count_tmax.mrql | 2 +- .../other_systems/mrql/q07_count_tmin.mrql | 2 +- .../other_systems/mrql/q07_data_tmax.mrql | 20 ++ .../other_systems/mrql/q07_data_tmin.mrql | 20 ++ .../other_systems/mrql/q07_filter_1940.mrql | 30 ++ .../other_systems/mrql/q07_filter_1960.mrql | 30 ++ .../other_systems/mrql/q07_filter_1980.mrql | 30 ++ .../other_systems/mrql/q07_filter_2000.mrql | 30 ++ .../mrql_scripts/run_group_test.sh | 4 +- .../noaa-ghcn-daily/queries/q07_count_1940.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_count_1960.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_count_1980.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_count_2000.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_count_left.xq | 27 ++ .../noaa-ghcn-daily/queries/q07_data_tmax.xq | 26 ++ .../noaa-ghcn-daily/queries/q07_data_tmin.xq | 26 ++ .../noaa-ghcn-daily/queries/q07_filter_1940.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_filter_1960.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_filter_1980.xq | 35 +++ .../noaa-ghcn-daily/queries/q07_filter_2000.xq | 35 +++ .../noaa-ghcn-daily/scripts/run_benchmark.sh | 4 +- .../scripts/run_benchmark_cluster.sh | 9 +- .../scripts/testing_logging.properties | 10 +- .../scripts/weather_benchmark.py | 13 +- .../java/org/apache/vxquery/cli/VXQuery.java | 3 +- .../builders/nodes/DictionaryBuilder.java | 21 +- .../builders/nodes/UTF8StringBuilder.java | 53 ++++ .../functions/step/ChildPathStepUnnesting.java | 10 +- .../runtime/functions/util/FunctionHelper.java | 6 + .../vxquery/xmlparser/SAXContentHandler.java | 78 ++--- .../xmlquery/query/XMLQueryCompiler.java | 8 +- vxquery-server/pom.xml | 305 ++++++++++--------- .../src/main/resources/conf/cluster.properties | 54 ++++ .../src/main/resources/scripts/startcc.sh | 21 +- .../src/main/resources/scripts/startnc.sh | 19 +- .../src/main/resources/scripts/stopcc.sh | 4 + .../src/main/resources/scripts/stopcluster.sh | 12 +- .../src/main/resources/scripts/stopnc.sh | 4 + 44 files changed, 1097 insertions(+), 211 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql new file mode 100644 index 0000000..268f211 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1940.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +count( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1960 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1960 +) +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql new file mode 100644 index 0000000..268f211 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1960.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +count( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1960 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1960 +) +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql new file mode 100644 index 0000000..2921cf6 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_1980.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +count( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1980 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1980 +) +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql new file mode 100644 index 0000000..8bfcb63 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_2000.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +count( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 2000 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 2000 +) +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql index 8dec470..c48ae67 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_join.mrql @@ -17,10 +17,10 @@ count( select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) from rtmax in source(xml, args[0], {"data"}), - rtmin in source(xml, args[0], {"data"}) + rtmin in source(xml, args[2], {"data"}) where text(rtmax.date) = text(rtmin.date) and text(rtmax.station) = text(rtmin.station) - and text(r.dataType) = "TMAX" - and text(r.dataType) = "TMIN" + and text(rtmax.dataType) = "TMAX" + and text(rtmin.dataType) = "TMIN" ) ; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql new file mode 100644 index 0000000..c4325f3 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_left.mrql @@ -0,0 +1,21 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +count( + select (rtmin) + from rtmin in source(xml, args[0], {"data"}) +) +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql index ca8ab4c..49cca89 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmax.mrql @@ -17,6 +17,6 @@ count( select (rtmax) from rtmax in source(xml, args[0], {"data"}) - where text(r.dataType) = "TMAX" + where text(rtmax.dataType) = "TMAX" ) ; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql index fe17ebe..00ae56f 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_count_tmin.mrql @@ -17,6 +17,6 @@ count( select (rtmin) from rtmin in source(xml, args[0], {"data"}) - where text(r.dataType) = "TMIN" + where text(rtmin.dataType) = "TMIN" ) ; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql new file mode 100644 index 0000000..374a0e3 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmax.mrql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + select (rtmax) + from rtmax in source(xml, args[0], {"data"}) + where text(r.dataType) = "TMAX" +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql new file mode 100644 index 0000000..10e2fe9 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_data_tmin.mrql @@ -0,0 +1,20 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + select (rtmin) + from rtmin in source(xml, args[0], {"data"}) + where text(r.dataType) = "TMIN" +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql new file mode 100644 index 0000000..54699fc --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1940.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +avg( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1960 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1960 +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql new file mode 100644 index 0000000..54699fc --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1960.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +avg( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1960 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1960 +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql new file mode 100644 index 0000000..bff559b --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_1980.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +avg( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 1980 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 1980 +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql new file mode 100644 index 0000000..052a0c9 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/q07_filter_2000.mrql @@ -0,0 +1,30 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +avg( + select (toInt(text(rtmax.value))-toInt(text(rtmin.value))) + from rtmax in source(xml, args[0], {"data"}), + dtmax in rtmax.date, + rtmin in source(xml, args[2], {"data"}), + dtmin in rtmin.date + where text(rtmax.date) = text(rtmin.date) + and text(rtmax.station) = text(rtmin.station) + and text(rtmax.dataType) = "TMAX" + and toInt(substring(text(dtmax), 0, 4)) > 2000 + and text(rtmin.dataType) = "TMIN" + and toInt(substring(text(dtmin), 0, 4)) > 2000 +) / 10 +; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh index 134c05e..84028e5 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh @@ -32,8 +32,8 @@ fi DATASET=${1} NODES=${2} REPEAT=1 -#DATA_FILES=${NODES} -DATA_FILES=8 +DATA_FILES=${NODES} +#DATA_FILES=8 # Start Hadoop sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq new file mode 100644 index 0000000..c70b4d2 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1940.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:count( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1940 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1940 + return $r_max/value - $r_min/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq new file mode 100644 index 0000000..26b08f9 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1960.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:count( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1960 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1960 + return $r_max/value - $r_min/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq new file mode 100644 index 0000000..daa760b --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_1980.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:count( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1980 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1980 + return $r_max/value - $r_min/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq new file mode 100644 index 0000000..b905807 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_2000.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:count( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 2000 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 2000 + return $r_max/value - $r_min/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq new file mode 100644 index 0000000..0639ff4 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_count_left.xq @@ -0,0 +1,27 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: +XQuery Join Query +------------------- +Find the all the records for TMIN. +:) +fn:count( + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + return $r_max +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq new file mode 100644 index 0000000..e511918 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmax.xq @@ -0,0 +1,26 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: +XQuery Join Query +------------------- +Find the all the records for TMAX. +:) + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + where $r_max/dataType eq "TMAX" + return $r_max http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq new file mode 100644 index 0000000..579859f --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_data_tmin.xq @@ -0,0 +1,26 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: +XQuery Join Query +------------------- +Find the all the records for TMIN. +:) + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + where $r_min/dataType eq "TMIN" + return $r_min http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq new file mode 100644 index 0000000..e122494 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1940.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:avg( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1940 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1940 + return $r_max/value - $r_min/value +) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq new file mode 100644 index 0000000..7888560 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1960.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:avg( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1960 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1960 + return $r_max/value - $r_min/value +) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq new file mode 100644 index 0000000..0b0cbd3 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_1980.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:avg( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 1980 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 1980 + return $r_max/value - $r_min/value +) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq new file mode 100644 index 0000000..b343bd0 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_filter_2000.xq @@ -0,0 +1,35 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:avg( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_min/date))) gt 2000 + and $r_max/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r_max/date))) gt 2000 + return $r_max/value - $r_min/value +) div 10 \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh index 8bc6772..c4d8922 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh @@ -29,7 +29,7 @@ REPEAT=5 IGNORE=2 FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) -JOIN_HASH_SIZE=$(( 4 * (64*1024*1024) )) +JOIN_HASH_SIZE=$(( 6 * (1024*1024*1024) / 8 )) if [ -z "${1}" ] then @@ -37,7 +37,7 @@ then exit fi -export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties" +export JAVA_OPTS="$JAVA_OPTS -server -Xmx7G -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties" for j in $(find ${1} -name '*q??.xq') do http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh index c90a7a9..7a0c7a9 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark_cluster.sh @@ -25,11 +25,12 @@ # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "-client-net-ip-address 169.235.27.138" # run_benchmark.sh ./noaa-ghcn-daily/benchmarks/local_speed_up/queries/ "" q03 # -CLUSTER="uci" -REPEAT=5 +CLUSTER="rita" +REPEAT=1 FRAME_SIZE=$((8*1024)) BUFFER_SIZE=$((32*1024*1024)) -JOIN_HASH_SIZE=$((4*4*64*1024*1024)) +JOIN_HASH_SIZE=$((1024*1024*1024)) +#JOIN_HASH_SIZE=-1 if [ -z "${1}" ] then @@ -50,7 +51,7 @@ python vxquery-server/src/main/resources/scripts/cluster_cli.py -c vxquery-serve # wait for cluster to finish setting up sleep 5 -export JAVA_OPTS="$JAVA_OPTS -server -Xmx8G -XX:+HeapDumpOnOutOfMemoryError -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/benchmark_logging.properties" +export JAVA_OPTS="$JAVA_OPTS -server -Xmx7G -Djava.util.logging.config.file=./vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties" for j in $(find ${1} -name '*q??.xq') do http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties index ff877dd..672e456 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/testing_logging.properties @@ -43,9 +43,9 @@ handlers= java.util.logging.ConsoleHandler # Note that the ConsoleHandler also has a separate level # setting to limit messages printed to the console. -#.level= WARNING -# .level= INFO -.level= FINE +# .level= WARNING + .level= INFO +# .level= FINE # .level = FINEST ############################################################ @@ -75,5 +75,5 @@ java.util.logging.ConsoleHandler.formatter = java.util.logging.SimpleFormatter # messages: # edu.uci.ics.asterix.level = FINE -edu.uci.ics.algebricks.level = FINE -# edu.uci.ics.hyracks.level = FINE +# edu.uci.ics.hyracks.algebricks.level = FINE +edu.uci.ics.hyracks.level = FINE http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py index 746fef4..fbd7b04 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py @@ -59,9 +59,20 @@ class WeatherBenchmark: "q06_count_join.xq", "q06_count_sensor.xq", "q06_count_station.xq", + "q07_count_1940.xq", + "q07_count_1960.xq", + "q07_count_1980.xq", + "q07_count_2000.xq", "q07_count_join.xq", + "q07_count_left.xq", "q07_count_tmin.xq", "q07_count_tmax.xq", + "q07_data_tmin.xq", + "q07_data_tmax.xq", + "q07_filter_1940.xq", + "q07_filter_1960.xq", + "q07_filter_1980.xq", + "q07_filter_2000.xq", ] BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] @@ -364,7 +375,7 @@ def get_cluster_query_path(base_paths, test, partition, nodes): def get_cluster_virtual_disk_partitions(nodes, partitions): vp = get_local_virtual_disk_partitions(partitions) - vn = calculate_partitions(range(1, len(nodes)+1, 1)) + vn = calculate_partitions(range(1, len(nodes) + 1, 1)) return vp * vn def get_local_virtual_disk_partitions(partitions): http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java ---------------------------------------------------------------------- diff --git a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java index 53d9ec2..1e083df 100644 --- a/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java +++ b/vxquery-cli/src/main/java/org/apache/vxquery/cli/VXQuery.java @@ -389,6 +389,7 @@ public class VXQuery { ncConfig.dataIPAddress = "127.0.0.1"; ncConfig.datasetIPAddress = "127.0.0.1"; ncConfig.nodeId = "nc" + (i + 1); + ncConfig.ioDevices = "/tmp"; ncs[i] = new NodeControllerService(ncConfig); ncs[i].start(); } @@ -450,7 +451,7 @@ public class VXQuery { private int frameSize = 65536; @Option(name = "-join-hash-size", usage = "Join hash size in bytes. (default 67,108,864)") - private int joinHashSize = -1; + private long joinHashSize = -1; @Option(name = "-buffer-size", usage = "Disk read buffer size in bytes.") private int bufferSize = -1; http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java index db7ecf7..bee221d 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/DictionaryBuilder.java @@ -40,10 +40,12 @@ public class DictionaryBuilder { private final DataOutput dataBufferOut; - private final ByteArrayAccessibleOutputStream tempStringData; + private final ArrayBackedValueStorage cache; private final TreeMap<String, Integer> hashSlotIndexes; + private boolean cacheReady; + private final IValueReferenceVector sortedStringsVector = new IValueReferenceVector() { @Override public int getStart(int index) { @@ -74,16 +76,27 @@ public class DictionaryBuilder { sortedSlotIndexes = new GrowableIntArray(); dataBuffer = new ByteArrayAccessibleOutputStream(); dataBufferOut = new DataOutputStream(dataBuffer); - tempStringData = new ByteArrayAccessibleOutputStream(); + cache = new ArrayBackedValueStorage(); hashSlotIndexes = new TreeMap<String, Integer>(); + cacheReady = false; } public void reset() { stringEndOffsets.clear(); sortedSlotIndexes.clear(); dataBuffer.reset(); - tempStringData.reset(); hashSlotIndexes.clear(); + cacheReady = false; + } + + public void writeFromCache(ArrayBackedValueStorage abvs) throws IOException { + if (!cacheReady) { + cache.reset(); + write(cache); + cacheReady = true; + } + DataOutput out = abvs.getDataOutput(); + out.write(cache.getByteArray(), cache.getStartOffset(), cache.getLength()); } public void write(ArrayBackedValueStorage abvs) throws IOException { @@ -122,6 +135,7 @@ public class DictionaryBuilder { } stringEndOffsets.append(dataBuffer.size()); hashSlotIndexes.put(str, slotIndex); + cacheReady = false; } return slotIndex; } @@ -141,6 +155,7 @@ public class DictionaryBuilder { } stringEndOffsets.append(dataBuffer.size()); sortedSlotIndexes.insert(index, slotIndex); + cacheReady = false; return slotIndex; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java new file mode 100644 index 0000000..3b4eea0 --- /dev/null +++ b/vxquery-core/src/main/java/org/apache/vxquery/datamodel/builders/nodes/UTF8StringBuilder.java @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.vxquery.datamodel.builders.nodes; + +import java.io.DataOutput; +import java.io.IOException; +import java.io.UTFDataFormatException; + +import org.apache.vxquery.runtime.functions.util.FunctionHelper; + +import edu.uci.ics.hyracks.data.std.api.IMutableValueStorage; +import edu.uci.ics.hyracks.data.std.primitive.BytePointable; + +public class UTF8StringBuilder extends AbstractNodeBuilder { + private IMutableValueStorage mvs; + private DataOutput out; + + @Override + public void reset(IMutableValueStorage mvs) throws IOException { + this.mvs = mvs; + out = mvs.getDataOutput(); + out.write(0); + out.write(0); + } + + @Override + public void finish() throws IOException { + int utflen = mvs.getLength() - 2; + BytePointable.setByte(mvs.getByteArray(), 0, (byte) ((utflen >>> 8) & 0xFF)); + BytePointable.setByte(mvs.getByteArray(), 1, (byte) ((utflen >>> 0) & 0xFF)); + } + + public void appendCharArray(char[] ch, int start, int length) throws IOException { + FunctionHelper.writeCharArray(ch, start, length, out); + if (mvs.getLength() > 65535) { + throw new UTFDataFormatException("encoded string too long: " + mvs.getLength() + " bytes"); + } + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java index ea65066..dbae9de 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/step/ChildPathStepUnnesting.java @@ -43,6 +43,7 @@ public class ChildPathStepUnnesting extends AbstractForwardAxisPathStep { private final TaggedValuePointable tvpNtp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); private final TaggedValuePointable tvpStep = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); INodeFilter filter; + int filterLookupID = -1; public ChildPathStepUnnesting(IHyracksTaskContext ctx, PointablePool pp) { super(ctx, pp); @@ -56,9 +57,12 @@ public class ChildPathStepUnnesting extends AbstractForwardAxisPathStep { throw new IllegalArgumentException("Expected int value tag, got: " + args[1].getTag()); } args[1].getValue(ip); - SequenceType sType = dCtx.getStaticContext().lookupSequenceType(ip.getInteger()); - filter = NodeTestFilter.getNodeTestFilter(sType); - + if (ip.getInteger() != filterLookupID) { + filterLookupID = ip.getInteger(); + SequenceType sType = dCtx.getStaticContext().lookupSequenceType(ip.getInteger()); + filter = NodeTestFilter.getNodeTestFilter(sType); + } + if (args[0].getTag() == ValueTag.SEQUENCE_TAG) { args[0].getValue(seqNtp); seqArgsLength = seqNtp.getEntryCount(); http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java index 3b60e4e..3a20a86 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/runtime/functions/util/FunctionHelper.java @@ -1260,6 +1260,12 @@ public class FunctionHelper { } } + public static void writeCharArray(char[] ch, int start, int length, DataOutput dOut) { + for (int i = start; i < start + length; ++i) { + writeChar(ch[i], dOut); + } + } + public static void writeDateAsString(IDate date, DataOutput dOut) { // Year writeNumberWithPadding(date.getYear(), 4, dOut); http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java index 37ce001..e40147e 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlparser/SAXContentHandler.java @@ -30,6 +30,7 @@ import org.apache.vxquery.datamodel.builders.nodes.DocumentNodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.ElementNodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.PINodeBuilder; import org.apache.vxquery.datamodel.builders.nodes.TextNodeBuilder; +import org.apache.vxquery.datamodel.builders.nodes.UTF8StringBuilder; import org.apache.vxquery.datamodel.values.ValueTag; import org.apache.vxquery.types.BuiltinTypeQNames; import org.apache.vxquery.types.ElementType; @@ -59,6 +60,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private final DocumentNodeBuilder docb; private final PINodeBuilder pinb; private final TextNodeBuilder tnb; + private final UTF8StringBuilder utf8b; private final List<ElementNodeBuilder> enbStack; private final List<ElementNodeBuilder> freeENBList; @@ -78,16 +80,14 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { // Basic tracking and setting variables private final boolean attachTypes; - private final StringBuilder buffer; private final boolean createNodeIds; private int depth; - private final ArrayBackedValueStorage docABVS; - private final ArrayBackedValueStorage elementABVS; + private final ArrayBackedValueStorage resultABVS; private boolean pendingText; private int nodeIdCounter; private final ITreeNodeIdProvider nodeIdProvider; - private final ArrayBackedValueStorage resultABVS; private final ArrayBackedValueStorage tempABVS; + private final ArrayBackedValueStorage textABVS; public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider) { // XML node builders @@ -97,25 +97,24 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { docb = new DocumentNodeBuilder(); pinb = new PINodeBuilder(); tnb = new TextNodeBuilder(); + utf8b = new UTF8StringBuilder(); enbStack = new ArrayList<ElementNodeBuilder>(); freeENBList = new ArrayList<ElementNodeBuilder>(); - + // Element writing and path step variables skipping = true; tvp = (TaggedValuePointable) TaggedValuePointable.FACTORY.createPointable(); - + // Basic tracking and setting variables this.attachTypes = attachTypes; - buffer = new StringBuilder(); createNodeIds = nodeIdProvider != null; depth = 0; - docABVS = new ArrayBackedValueStorage(); - elementABVS = new ArrayBackedValueStorage(); + resultABVS = new ArrayBackedValueStorage(); pendingText = false; nodeIdCounter = 0; this.nodeIdProvider = nodeIdProvider; - resultABVS = new ArrayBackedValueStorage(); tempABVS = new ArrayBackedValueStorage(); + textABVS = new ArrayBackedValueStorage(); } public SAXContentHandler(boolean attachTypes, ITreeNodeIdProvider nodeIdProvider, ByteBuffer frame, @@ -141,7 +140,7 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { ElementType eType = (ElementType) nodeType; NameTest nameTest = eType.getNameTest(); childUri[index] = getStringFromBytes(nameTest.getUri()); - childLocalName[index] = getStringFromBytes(nameTest.getLocalName());; + childLocalName[index] = getStringFromBytes(nameTest.getLocalName()); ++index; } } @@ -157,7 +156,12 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { if (skipping) { return; } - buffer.append(ch, start, length); + try { + utf8b.appendCharArray(ch, start, length); + } catch (IOException e) { + e.printStackTrace(); + throw new SAXException(e); + } pendingText = true; } @@ -255,12 +259,18 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { skipping = false; } db.reset(); - docABVS.reset(); + try { + textABVS.reset(); + utf8b.reset(textABVS); + } catch (IOException e) { + throw new SAXException(e); + } if (skipping) { return; } try { - docb.reset(docABVS); + resultABVS.reset(); + docb.reset(resultABVS); if (createNodeIds) { docb.setLocalNodeId(nodeIdCounter++); } @@ -363,15 +373,15 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { try { flushText(); startChildInParent(cnb); - buffer.append(ch, start, length); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(buffer.toString()); if (createNodeIds) { cnb.setLocalNodeId(nodeIdCounter++); } - cnb.setValue(tempABVS); + utf8b.appendCharArray(ch, start, length); + utf8b.finish(); + cnb.setValue(textABVS); endChildInParent(cnb); - buffer.delete(0, buffer.length()); + textABVS.reset(); + utf8b.reset(textABVS); } catch (IOException e) { e.printStackTrace(); throw new SAXException(e); @@ -381,14 +391,14 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private void flushText() throws IOException { if (pendingText) { peekENBStackTop().startChild(tnb); - tempABVS.reset(); - tempABVS.getDataOutput().writeUTF(buffer.toString()); if (createNodeIds) { tnb.setLocalNodeId(nodeIdCounter++); } - tnb.setValue(tempABVS); + utf8b.finish(); + tnb.setValue(textABVS); peekENBStackTop().endChild(tnb); - buffer.delete(0, buffer.length()); + textABVS.reset(); + utf8b.reset(textABVS); pendingText = false; } } @@ -418,8 +428,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { } public void writeElement() throws IOException { - resultABVS.reset(); - DataOutput out = resultABVS.getDataOutput(); + tempABVS.reset(); + DataOutput out = tempABVS.getDataOutput(); out.write(ValueTag.NODE_TREE_TAG); byte header = NodeTreePointable.HEADER_DICTIONARY_EXISTS_MASK; if (attachTypes) { @@ -432,13 +442,9 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { if (createNodeIds) { out.writeInt(nodeIdProvider.getId()); } - db.write(resultABVS); - if (subElement == null) { - out.write(docABVS.getByteArray(), docABVS.getStartOffset(), docABVS.getLength()); - } else { - out.write(elementABVS.getByteArray(), elementABVS.getStartOffset(), elementABVS.getLength()); - } - tvp.set(resultABVS.getByteArray(), resultABVS.getStartOffset(), resultABVS.getLength()); + db.writeFromCache(tempABVS); + out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), resultABVS.getLength()); + tvp.set(tempABVS.getByteArray(), tempABVS.getStartOffset(), tempABVS.getLength()); addNodeToTuple(tvp, tupleIndex); skipping = true; } @@ -457,8 +463,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { if (createNodeIds) { out.writeInt(nodeIdProvider.getId()); } - db.write(abvs); - out.write(docABVS.getByteArray(), docABVS.getStartOffset(), docABVS.getLength()); + db.writeFromCache(abvs); + out.write(resultABVS.getByteArray(), resultABVS.getStartOffset(), resultABVS.getLength()); } private ElementNodeBuilder createENB() { @@ -482,8 +488,8 @@ public class SAXContentHandler implements ContentHandler, LexicalHandler { private void startChildInParent(AbstractNodeBuilder anb, boolean startNewElement) throws IOException { if (startNewElement) { - elementABVS.reset(); - anb.reset(elementABVS); + resultABVS.reset(); + anb.reset(resultABVS); } else if (enbStack.isEmpty()) { docb.startChild(anb); } else { http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java index 44f2179..0e18328 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java @@ -94,7 +94,7 @@ public class XMLQueryCompiler { } public XMLQueryCompiler(XQueryCompilationListener listener, String[] nodeList, int frameSize, - int availableProcessors, int joinHashSize) { + int availableProcessors, long joinHashSize) { this.listener = listener == null ? NoopXQueryCompilationListener.INSTANCE : listener; this.frameSize = frameSize; this.nodeList = nodeList; @@ -113,8 +113,12 @@ public class XMLQueryCompiler { }); builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize); if (joinHashSize > 0) { - builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize/this.frameSize); + builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash((int) (joinHashSize / this.frameSize)); } + + builder.getPhysicalOptimizationConfig().setMaxFramesLeftInputHybridHash( + (int) (60L * 1024 * 1048576 / this.frameSize)); + builder.setLogicalRewrites(buildDefaultLogicalRewrites()); builder.setPhysicalRewrites(buildDefaultPhysicalRewrites()); builder.setSerializerDeserializerProvider(new ISerializerDeserializerProvider() { http://git-wip-us.apache.org/repos/asf/vxquery/blob/b665db73/vxquery-server/pom.xml ---------------------------------------------------------------------- diff --git a/vxquery-server/pom.xml b/vxquery-server/pom.xml index 6c99712..dfd3ab9 100644 --- a/vxquery-server/pom.xml +++ b/vxquery-server/pom.xml @@ -14,153 +14,172 @@ See the License for the specific language governing permissions and limitations under the License. --> -<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> - <modelVersion>4.0.0</modelVersion> +<project + xmlns="http://maven.apache.org/POM/4.0.0" + xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" + xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> - <parent> - <groupId>org.apache.vxquery</groupId> - <artifactId>apache-vxquery</artifactId> - <version>0.5-SNAPSHOT</version> - </parent> + <parent> + <groupId>org.apache.vxquery</groupId> + <artifactId>apache-vxquery</artifactId> + <version>0.5-SNAPSHOT</version> + </parent> - <artifactId>apache-vxquery-server</artifactId> - <packaging>jar</packaging> - <name>VXQuery Server</name> - <description>Apache VXQuery Server</description> + <artifactId>apache-vxquery-server</artifactId> + <packaging>jar</packaging> + <name>VXQuery Server</name> + <description>Apache VXQuery Server</description> - <distributionManagement> - <site> - <id>vxquery.website</id> - <name>VXQuery Website</name> - <url>file:../site/vxquery-server/</url> - </site> - </distributionManagement> + <distributionManagement> + <site> + <id>vxquery.website</id> + <name>VXQuery Website</name> + <url>file:../site/vxquery-server/</url> + </site> + </distributionManagement> - <build> - <plugins> - <plugin> - <groupId>org.codehaus.mojo</groupId> - <artifactId>appassembler-maven-plugin</artifactId> - <version>1.1.1</version> - <executions> - <execution> - <configuration> - <programs> - <program> - <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass> - <name>vxqueryshutdown</name> - </program> - <program> - <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass> - <name>vxquerycc</name> - </program> - <program> - <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass> - <name>vxquerync</name> - </program> - </programs> - <repositoryLayout>flat</repositoryLayout> - <repositoryName>lib</repositoryName> - </configuration> - <phase>package</phase> - <goals> - <goal>assemble</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <artifactId>maven-resources-plugin</artifactId> - <version>2.5</version> - <executions> - <execution> - <id>copy-scripts</id> - <!-- here the phase you need --> - <phase>package</phase> - <goals> - <goal>copy-resources</goal> - </goals> - <configuration> - <outputDirectory>target/appassembler/bin</outputDirectory> - <resources> - <resource> - <directory>src/main/resources/scripts</directory> - </resource> - </resources> - </configuration> - </execution> - </executions> - </plugin> - <plugin> - <artifactId>maven-assembly-plugin</artifactId> - <version>2.4</version> - <executions> - <execution> - <configuration> - <descriptors> - <descriptor>src/main/assembly/binary-assembly.xml</descriptor> - </descriptors> - </configuration> - <phase>package</phase> - <goals> - <goal>attached</goal> - </goals> - </execution> - </executions> - </plugin> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-site-plugin</artifactId> - </plugin> - <plugin> - <artifactId>maven-antrun-plugin</artifactId> - <executions> - <execution> - <id>generate-site</id> - <phase>none</phase> - </execution> - </executions> - </plugin> - </plugins> - </build> - - <dependencies> - <dependency> - <groupId>org.apache.vxquery</groupId> - <artifactId>apache-vxquery-core</artifactId> - <version>0.5-SNAPSHOT</version> - </dependency> + <build> + <plugins> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>appassembler-maven-plugin</artifactId> + <version>1.1.1</version> + <executions> + <execution> + <configuration> + <programs> + <program> + <mainClass>org.apache.vxquery.cli.VXQueryClusterShutdown</mainClass> + <name>vxqueryshutdown</name> + </program> + <program> + <mainClass>edu.uci.ics.hyracks.control.cc.CCDriver</mainClass> + <name>vxquerycc</name> + </program> + <program> + <mainClass>edu.uci.ics.hyracks.control.nc.NCDriver</mainClass> + <name>vxquerync</name> + </program> + </programs> + <repositoryLayout>flat</repositoryLayout> + <repositoryName>lib</repositoryName> + </configuration> + <phase>package</phase> + <goals> + <goal>assemble</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-resources-plugin</artifactId> + <version>2.5</version> + <executions> + <execution> + <id>copy-scripts</id> + <!-- here the phase you need --> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>target/appassembler/bin</outputDirectory> + <resources> + <resource> + <directory>src/main/resources/scripts</directory> + </resource> + </resources> + </configuration> + </execution> + <execution> + <id>copy-conf</id> + <!-- here the phase you need --> + <phase>package</phase> + <goals> + <goal>copy-resources</goal> + </goals> + <configuration> + <outputDirectory>target/appassembler/conf</outputDirectory> + <resources> + <resource> + <directory>src/main/resources/conf</directory> + </resource> + </resources> + </configuration> + </execution> + </executions> + </plugin> + <plugin> + <artifactId>maven-assembly-plugin</artifactId> + <version>2.4</version> + <executions> + <execution> + <configuration> + <descriptors> + <descriptor>src/main/assembly/binary-assembly.xml</descriptor> + </descriptors> + </configuration> + <phase>package</phase> + <goals> + <goal>attached</goal> + </goals> + </execution> + </executions> + </plugin> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-site-plugin</artifactId> + </plugin> + <plugin> + <artifactId>maven-antrun-plugin</artifactId> + <executions> + <execution> + <id>generate-site</id> + <phase>none</phase> + </execution> + </executions> + </plugin> + </plugins> + </build> - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-control-cc</artifactId> - </dependency> - - <dependency> - <groupId>edu.uci.ics.hyracks</groupId> - <artifactId>hyracks-control-nc</artifactId> - </dependency> - </dependencies> + <dependencies> + <dependency> + <groupId>org.apache.vxquery</groupId> + <artifactId>apache-vxquery-core</artifactId> + <version>0.5-SNAPSHOT</version> + </dependency> - <reporting> - <plugins> - <plugin> - <groupId>org.apache.maven.plugins</groupId> - <artifactId>maven-project-info-reports-plugin</artifactId> - <reportSets> - <reportSet> - <reports> - <report>index</report> - <report>dependencies</report> - <report>plugins</report> - </reports> - </reportSet> - </reportSets> - <configuration> - <linkOnly>true</linkOnly> - <dependencyLocationsEnabled>false</dependencyLocationsEnabled> - </configuration> - </plugin> - </plugins> - </reporting> + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-control-cc</artifactId> + </dependency> + + <dependency> + <groupId>edu.uci.ics.hyracks</groupId> + <artifactId>hyracks-control-nc</artifactId> + </dependency> + </dependencies> + + <reporting> + <plugins> + <plugin> + <groupId>org.apache.maven.plugins</groupId> + <artifactId>maven-project-info-reports-plugin</artifactId> + <reportSets> + <reportSet> + <reports> + <report>index</report> + <report>dependencies</report> + <report>plugins</report> + </reports> + </reportSet> + </reportSets> + <configuration> + <linkOnly>true</linkOnly> + <dependencyLocationsEnabled>false</dependencyLocationsEnabled> + </configuration> + </plugin> + </plugins> + </reporting> </project>
