Continuing to tweak the MRQL scripts.
Project: http://git-wip-us.apache.org/repos/asf/vxquery/repo Commit: http://git-wip-us.apache.org/repos/asf/vxquery/commit/9e0133ad Tree: http://git-wip-us.apache.org/repos/asf/vxquery/tree/9e0133ad Diff: http://git-wip-us.apache.org/repos/asf/vxquery/diff/9e0133ad Branch: refs/heads/master Commit: 9e0133adc899f580d89a47765da92b53c6d3ee17 Parents: 7f06298 Author: Preston Carman <[email protected]> Authored: Tue Oct 21 11:08:28 2014 -0700 Committer: Preston Carman <[email protected]> Committed: Tue Oct 21 11:08:28 2014 -0700 ---------------------------------------------------------------------- .../mrql_scripts/load_node_file.sh | 29 +++++--- .../mrql_scripts/run_group_test.sh | 25 ++++--- .../mrql_scripts/run_mrql_tests.sh | 2 +- .../noaa-ghcn-daily/scripts/run_benchmark.sh | 2 +- .../RemoveUnusedSortDistinctNodesRule.java | 19 +----- .../rules/util/CardinalityRuleToolbox.java | 13 ---- .../rewriter/rules/util/OperatorToolbox.java | 72 -------------------- .../vxquery/functions/builtin-functions.xml | 1 + .../xmlquery/query/XMLQueryCompiler.java | 2 +- .../src/main/resources/conf/cluster_example.xml | 12 ++-- .../src/main/resources/conf/local.xml | 18 ++--- 11 files changed, 57 insertions(+), 138 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh index ead0902..206c38b 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh @@ -19,20 +19,29 @@ if [ -z "${1}" ] then - echo "Please enter the node number." + echo "Please enter the data set as the first argument." exit fi -echo "Loading node ${1} data file in to cluster." +if [ -z "${2}" ] +then + echo "Please enter the node number as the second argument." + exit +fi + +DATASET=${1} +NODES=${2} + +echo "Loading ${NODES} node ${DATASET} data file in to cluster." # Add each sensor block -cp saved/backups/mr/all_sensors_${1}.xml.gz disk1/hadoop/ -gunzip disk1/hadoop/all_sensors_${1}.xml.gz -hadoop fs -copyFromLocal disk1/hadoop/all_sensors_${1}.xml all/sensors -rm -f disk1/hadoop/all_sensors_${1}.xml +cp saved/backups/mr/${DATASET}_sensors_${NODES}.xml.gz disk1/hadoop/ +gunzip disk1/hadoop/${DATASET}_sensors_${NODES}.xml.gz +hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_sensors_${NODES}.xml ${DATASET}/sensors +rm -f disk1/hadoop/${DATASET}_sensors_${NODES}.xml # Add each station block -cp saved/backups/mr/all_stations_${1}.xml.gz disk1/hadoop/ -gunzip disk1/hadoop/all_stations_${1}.xml.gz -hadoop fs -copyFromLocal disk1/hadoop/all_stations_${1}.xml all/stations -rm -f disk1/hadoop/all_stations_${1}.xml +cp saved/backups/mr/${DATASET}_stations_${NODES}.xml.gz disk1/hadoop/ +gunzip disk1/hadoop/${DATASET}_stations_${NODES}.xml.gz +hadoop fs -copyFromLocal disk1/hadoop/${DATASET}_stations_${NODES}.xml ${DATASET}/stations +rm -f disk1/hadoop/${DATASET}_stations_${NODES}.xml http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh index f42a451..0208beb 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_group_test.sh @@ -19,11 +19,18 @@ if [ -z "${1}" ] then - echo "Please enter the number of nodes." + echo "Please enter the data set as the first argument." exit fi -NODES=${1} +if [ -z "${2}" ] +then + echo "Please enter the node number as the second argument." + exit +fi + +DATASET=${1} +NODES=${2} REPEAT=1 # Start Hadoop @@ -32,24 +39,26 @@ sh saved/hadoop/hadoop-1.2.1/bin/start-all.sh sleep 10 # Prepare hadoop file system -hadoop fs -mkdir all +hadoop fs -mkdir ${DATASET} hadoop fs -ls -hadoop fs -mkdir all/sensors -hadoop fs -mkdir all/stations -hadoop fs -ls all +hadoop fs -mkdir ${DATASET}/sensors +hadoop fs -mkdir ${DATASET}/stations +hadoop fs -ls ${DATASET} + +hadoop balancer # Upload test data COUNTER=0 while [ ${COUNTER} -lt ${NODES} ]; do - sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${COUNTER} + sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/load_node_file.sh ${DATASET} ${COUNTER} let COUNTER=COUNTER+1 done # Start test -sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} +sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql/ ${NODES} ${REPEAT} ${DATASET} # Stop Hadoop http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh index 1e512e1..d6bc9ab 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/other_systems/mrql_scripts/run_mrql_tests.sh @@ -22,7 +22,7 @@ NODES=${2} REPEAT=${3} -DATASET="all" +DATASET=${4} # Make log folder http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh ---------------------------------------------------------------------- diff --git a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh index 88339bd..5146586 100755 --- a/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh +++ b/vxquery-benchmark/src/main/resources/noaa-ghcn-daily/scripts/run_benchmark.sh @@ -52,7 +52,7 @@ do echo "\nBuffer Size: ${BUFFER_SIZE}" >> ${log_base_path}/${log_file} echo "\nFrame Size: ${FRAME_SIZE}" >> ${log_base_path}/${log_file} echo "\nJoin Hash Size: ${JOIN_HASH_SIZE}" >> ${log_base_path}/${log_file} - fi; + fi; done if which programname >/dev/null; http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java index 43d636b..43e2603 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/RemoveUnusedSortDistinctNodesRule.java @@ -384,8 +384,8 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule // Find the last operator to set a variable and call this function again. SubplanOperator subplan = (SubplanOperator) op; for (int index = 0; index < subplan.getNestedPlans().size(); index++) { - AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans().get(index) - .getRoots().get(0).getValue(); + AbstractLogicalOperator lastOperator = (AbstractLogicalOperator) subplan.getNestedPlans() + .get(index).getRoots().get(0).getValue(); updateVariableMap(lastOperator, cardinalityVariable, documentOrderVariables, uniqueNodesVariables, vxqueryContext); } @@ -437,21 +437,6 @@ public class RemoveUnusedSortDistinctNodesRule implements IAlgebraicRewriteRule break; // The following operators' analysis has not yet been implemented. - case CLUSTER: - case DISTINCT: - case EXTENSION_OPERATOR: - case GROUP: - case INDEX_INSERT_DELETE: - case INSERT_DELETE: - case LIMIT: - case PARTITIONINGSPLIT: - case REPLICATE: - case RUNNINGAGGREGATE: - case SCRIPT: - case SINK: - case UNIONALL: - case UNNEST_MAP: - case UPDATE: default: throw new RuntimeException("Operator (" + op.getOperatorTag() + ") has not been implemented in rewrite rule."); http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java index a586c06..5b4594e 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/CardinalityRuleToolbox.java @@ -81,19 +81,6 @@ public class CardinalityRuleToolbox { break; // The following operators' analysis has not yet been implemented. - case CLUSTER: - case DISTINCT: - case EXTENSION_OPERATOR: - case INDEX_INSERT_DELETE: - case INSERT_DELETE: - case PARTITIONINGSPLIT: - case REPLICATE: - case RUNNINGAGGREGATE: - case SCRIPT: - case SINK: - case UNIONALL: - case UNNEST_MAP: - case UPDATE: default: throw new RuntimeException("Operator (" + op.getOperatorTag() + ") has not been implemented in rewrite rule."); http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java index da85f2d..725a082 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/compiler/rewriter/rules/util/OperatorToolbox.java @@ -84,29 +84,6 @@ public class OperatorToolbox { AbstractUnnestOperator auo = (AbstractUnnestOperator) op; result.add(auo.getExpressionRef()); break; - case CLUSTER: - case DATASOURCESCAN: - case DISTINCT: - case DISTRIBUTE_RESULT: - case EMPTYTUPLESOURCE: - case EXCHANGE: - case EXTENSION_OPERATOR: - case GROUP: - case INDEX_INSERT_DELETE: - case INSERT_DELETE: - case LIMIT: - case NESTEDTUPLESOURCE: - case ORDER: - case PARTITIONINGSPLIT: - case PROJECT: - case REPLICATE: - case SCRIPT: - case SINK: - case SUBPLAN: - case UNIONALL: - case UPDATE: - case WRITE: - case WRITE_RESULT: default: // TODO Not yet implemented. break; @@ -129,32 +106,6 @@ public class OperatorToolbox { case UNNEST_MAP: AbstractUnnestOperator ano = (AbstractUnnestOperator) op; return ano.getExpressionRef(); - case CLUSTER: - case DATASOURCESCAN: - case DISTINCT: - case DISTRIBUTE_RESULT: - case EMPTYTUPLESOURCE: - case EXCHANGE: - case EXTENSION_OPERATOR: - case GROUP: - case INDEX_INSERT_DELETE: - case INNERJOIN: - case INSERT_DELETE: - case LEFTOUTERJOIN: - case LIMIT: - case NESTEDTUPLESOURCE: - case ORDER: - case PARTITIONINGSPLIT: - case PROJECT: - case REPLICATE: - case SCRIPT: - case SELECT: - case SINK: - case SUBPLAN: - case UNIONALL: - case UPDATE: - case WRITE: - case WRITE_RESULT: default: // TODO Not yet implemented. break; @@ -196,29 +147,6 @@ public class OperatorToolbox { case EMPTYTUPLESOURCE: case NESTEDTUPLESOURCE: return null; - case CLUSTER: - case DISTINCT: - case DISTRIBUTE_RESULT: - case EXCHANGE: - case EXTENSION_OPERATOR: - case GROUP: - case INDEX_INSERT_DELETE: - case INNERJOIN: - case INSERT_DELETE: - case LEFTOUTERJOIN: - case LIMIT: - case ORDER: - case PARTITIONINGSPLIT: - case PROJECT: - case REPLICATE: - case SCRIPT: - case SELECT: - case SINK: - case SUBPLAN: - case UNIONALL: - case UPDATE: - case WRITE: - case WRITE_RESULT: default: // Skip operators and go look at input. for (Mutable<ILogicalOperator> input : op.getInputs()) { http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml index b439a83..38f03a4 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml +++ b/vxquery-core/src/main/java/org/apache/vxquery/functions/builtin-functions.xml @@ -125,6 +125,7 @@ <function name="fn:collection"> <param name="arg" type="xs:string?"/> <return type="node()*"/> + <!-- Collection operator is added during the rewrite rules phase. --> </function> <!-- fn:compare($comparand1 as xs:string?, $comparand2 as xs:string?) as xs:integer? --> http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java ---------------------------------------------------------------------- diff --git a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java index 966bd87..3cdc492 100644 --- a/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java +++ b/vxquery-core/src/main/java/org/apache/vxquery/xmlquery/query/XMLQueryCompiler.java @@ -113,7 +113,7 @@ public class XMLQueryCompiler { }); builder.getPhysicalOptimizationConfig().setFrameSize(this.frameSize); if (joinHashSize > 0) { - builder.getPhysicalOptimizationConfig().setInMemHashJoinTableSize(joinHashSize); + builder.getPhysicalOptimizationConfig().setMaxFramesHybridHash(joinHashSize); } builder.setLogicalRewrites(buildDefaultLogicalRewrites()); builder.setPhysicalRewrites(buildDefaultPhysicalRewrites()); http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/cluster_example.xml ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/conf/cluster_example.xml b/vxquery-server/src/main/resources/conf/cluster_example.xml index 41d642d..18d9173 100644 --- a/vxquery-server/src/main/resources/conf/cluster_example.xml +++ b/vxquery-server/src/main/resources/conf/cluster_example.xml @@ -15,13 +15,13 @@ limitations under the License. --> <cluster xmlns="cluster"> - <name>local</name> + <name>local</name> <username>joe</username> - <master_node> - <id>master</id> - <client_ip>128.195.52.177</client_ip> - <cluster_ip>192.168.100.0</cluster_ip> - </master_node> + <master_node> + <id>master</id> + <client_ip>128.195.52.177</client_ip> + <cluster_ip>192.168.100.0</cluster_ip> + </master_node> <node> <id>nodeA</id> <cluster_ip>192.168.100.1</cluster_ip> http://git-wip-us.apache.org/repos/asf/vxquery/blob/9e0133ad/vxquery-server/src/main/resources/conf/local.xml ---------------------------------------------------------------------- diff --git a/vxquery-server/src/main/resources/conf/local.xml b/vxquery-server/src/main/resources/conf/local.xml index 5c27e11..4a48620 100644 --- a/vxquery-server/src/main/resources/conf/local.xml +++ b/vxquery-server/src/main/resources/conf/local.xml @@ -15,14 +15,14 @@ limitations under the License. --> <cluster xmlns="cluster"> - <name>local</name> - <master_node> - <id>master</id> - <client_ip>127.0.0.1</client_ip> - <cluster_ip>127.0.0.1</cluster_ip> - </master_node> - <node> - <id>node1</id> - <cluster_ip>127.0.0.1</cluster_ip> + <name>local</name> + <master_node> + <id>master</id> + <client_ip>127.0.0.1</client_ip> + <cluster_ip>127.0.0.1</cluster_ip> + </master_node> + <node> + <id>node1</id> + <cluster_ip>127.0.0.1</cluster_ip> </node> </cluster>
