Adding join count queries to help identify the data.
Project: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/commit/fe6e05b4 Tree: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/tree/fe6e05b4 Diff: http://git-wip-us.apache.org/repos/asf/incubator-vxquery/diff/fe6e05b4 Branch: refs/heads/master Commit: fe6e05b4ecce98202c804010c05c0328dbf272f6 Parents: 9e23608 Author: Preston Carman <[email protected]> Authored: Tue Apr 8 22:02:41 2014 -0700 Committer: Preston Carman <[email protected]> Committed: Thu May 8 14:15:34 2014 -0700 ---------------------------------------------------------------------- .../noaa-ghcn-daily/queries/q04_join_count.xq | 32 +++++++++++++++++++ .../noaa-ghcn-daily/queries/q05_join_count.xq | 33 ++++++++++++++++++++ .../noaa-ghcn-daily/queries/q06_join_count.xq | 32 +++++++++++++++++++ .../noaa-ghcn-daily/queries/q07_join_count.xq | 33 ++++++++++++++++++++ .../scripts/weather_benchmark.py | 28 +++++++++++++++-- 5 files changed, 156 insertions(+), 2 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fe6e05b4/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_join_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_join_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_join_count.xq new file mode 100644 index 0000000..c63deec --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q04_join_count.xq @@ -0,0 +1,32 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Join Query :) +(: Find all the weather readings for Washington state for a specific day :) +(: 1976/7/4. :) +fn:count( + let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" + for $s in collection($station_collection)/stationCollection/station + + let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r in collection($sensor_collection)/dataCollection/data + + where $s/id eq $r/station + and (some $x in $s/locationLabels satisfies ($x/type eq "ST" and fn:upper-case(fn:data($x/displayName)) eq "WASHINGTON")) + and xs:dateTime(fn:data($r/date)) eq xs:dateTime("1976-07-04T00:00:00.000") + return $r +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fe6e05b4/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_join_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_join_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_join_count.xq new file mode 100644 index 0000000..0023cef --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q05_join_count.xq @@ -0,0 +1,33 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Join Aggregate Query :) +(: Find the lowest recorded temperature (TMIN) in the United States for :) +(: 2001. :) +fn:count( + let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" + for $s in collection($station_collection)/stationCollection/station + + let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r in collection($sensor_collection)/dataCollection/data + + where $s/id eq $r/station + and (some $x in $s/locationLabels satisfies ($x/type eq "CNTRY" and $x/id eq "FIPS:US")) + and $r/dataType eq "TMIN" + and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2001 + return $r/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fe6e05b4/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_join_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_join_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_join_count.xq new file mode 100644 index 0000000..85db07c --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q06_join_count.xq @@ -0,0 +1,32 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Join Query :) +(: Find the highest recorded temperature (TMAX) for each station for each :) +(: day over the year 2000. :) +fn:count( + let $station_collection := "/tmp/1.0_partition_ghcnd_all_xml/stations" + for $s in collection($station_collection)/stationCollection/station + + let $sensor_collection := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r in collection($sensor_collection)/dataCollection/data + + where $s/id eq $r/station + and $r/dataType eq "TMAX" + and fn:year-from-dateTime(xs:dateTime(fn:data($r/date))) eq 2000 + return ($s/displayName, $r/date, $r/value) +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fe6e05b4/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_join_count.xq ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_join_count.xq b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_join_count.xq new file mode 100644 index 0000000..24b8340 --- /dev/null +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/queries/q07_join_count.xq @@ -0,0 +1,33 @@ +(: Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. :) + +(: XQuery Self Join Query :) +(: Self join with all stations finding the difference in min and max :) +(: temperature and get the average. :) +fn:count( + let $sensor_collection_min := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_min in collection($sensor_collection_min)/dataCollection/data + + let $sensor_collection_max := "/tmp/1.0_partition_ghcnd_all_xml/sensors" + for $r_max in collection($sensor_collection_max)/dataCollection/data + + where $r_min/station eq $r_max/station + and $r_min/date eq $r_max/date + and $r_min/dataType eq "TMIN" + and $r_max/dataType eq "TMAX" + return $r_max/value - $r_min/value +) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-vxquery/blob/fe6e05b4/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py index 081f80a..2e04764 100644 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/weather_benchmark.py @@ -34,8 +34,32 @@ class WeatherBenchmark: QUERY_REPLACEMENT_KEY = "/tmp/1.0_partition_ghcnd_all_xml/" QUERY_MASTER_FOLDER = "../queries/" - QUERY_FILE_LIST = ["q00.xq", "q01.xq", "q02.xq", "q03.xq", "q04.xq", "q05.xq", "q06.xq", "q07.xq"] - QUERY_UTILITY_LIST = ["sensor_count.xq", "station_count.xq", "q04_sensor.xq", "q04_station.xq", "q05_sensor.xq", "q05_station.xq", "q06_sensor.xq", "q06_station.xq", "q07_tmin.xq", "q07_tmax.xq"] + QUERY_FILE_LIST = [ + "q00.xq", + "q01.xq", + "q02.xq", + "q03.xq", + "q04.xq", + "q05.xq", + "q06.xq", + "q07.xq" + ] + QUERY_UTILITY_LIST = [ + "sensor_count.xq", + "station_count.xq", + "q04_join_count.xq", + "q04_sensor.xq", + "q04_station.xq", + "q05_join_count.xq", + "q05_sensor.xq", + "q05_station.xq", + "q06_join_count.xq", + "q06_sensor.xq", + "q06_station.xq", + "q07_join_count.xq", + "q07_tmin.xq", + "q07_tmax.xq" + ] BENCHMARK_LOCAL_TESTS = ["local_speed_up", "local_batch_scale_out"] BENCHMARK_CLUSTER_TESTS = ["speed_up", "batch_scale_out"] QUERY_COLLECTIONS = ["sensors", "stations"]
