drill git commit: DRILL-1325: Throw UnsupportedRelOperatorException for unequal joins, implicit cross joins

2015-02-25 Thread amansinha
Repository: drill
Updated Branches:
  refs/heads/master 471013836 -> d72d6030e


DRILL-1325: Throw UnsupportedRelOperatorException for unequal joins, implicit 
cross joins


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d72d6030
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d72d6030
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d72d6030

Branch: refs/heads/master
Commit: d72d6030ed3961a5e4fa8839b4be5ec1065f4059
Parents: 4710138
Author: Hsuan-Yi Chu 
Authored: Tue Feb 24 19:08:40 2015 -0800
Committer: Hsuan-Yi Chu 
Committed: Wed Feb 25 17:49:26 2015 -0800

--
 .../exec/physical/impl/join/JoinUtils.java  | 42 +
 .../planner/sql/handlers/DefaultSqlHandler.java | 30 --
 .../work/foreman/SqlUnsupportedException.java   |  4 +
 .../foreman/UnsupportedDataTypeException.java   |  4 +
 .../foreman/UnsupportedFunctionException.java   |  6 +-
 .../UnsupportedRelOperatorException.java|  4 +
 .../apache/drill/TestDisabledFunctionality.java | 97 
 7 files changed, 177 insertions(+), 10 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/drill/blob/d72d6030/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java
--
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java
index 04f3bbe..b94289c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/join/JoinUtils.java
@@ -19,6 +19,11 @@
 package org.apache.drill.exec.physical.impl.join;
 
 import org.apache.drill.common.logical.data.JoinCondition;
+import org.eigenbase.rel.JoinRelBase;
+import org.eigenbase.rel.RelNode;
+import org.eigenbase.relopt.RelOptUtil;
+
+import java.util.List;
 
 public class JoinUtils {
   public static enum JoinComparator {
@@ -51,4 +56,41 @@ public class JoinUtils {
 throw new IllegalArgumentException("Invalid comparator supplied to this 
join.");
   }
 
+/**
+ * Check if the given RelNode contains any Cartesian join.
+ * Return true if find one. Otherwise, return false.
+ *
+ * @param relNode   the RelNode to be inspected.
+ * @param leftKeys  a list used for the left input into the join which has
+ *  equi-join keys. It can be empty or not (but not null),
+ *  this method will clear this list before using it.
+ * @param rightKeys a list used for the right input into the join which has
+ *  equi-join keys. It can be empty or not (but not null),
+ *  this method will clear this list before using it.
+ * @return  Return true if the given relNode contains Cartesian 
join.
+ *  Otherwise, return false
+ */
+  public static boolean checkCartesianJoin(RelNode relNode, List 
leftKeys, List rightKeys) {
+if (relNode instanceof JoinRelBase) {
+  leftKeys.clear();
+  rightKeys.clear();
+
+  JoinRelBase joinRel = (JoinRelBase) relNode;
+  RelNode left = joinRel.getLeft();
+  RelNode right = joinRel.getRight();
+
+  RelOptUtil.splitJoinCondition(left, right, joinRel.getCondition(), 
leftKeys, rightKeys);
+  if(leftKeys.isEmpty() || rightKeys.isEmpty()) {
+return true;
+  }
+}
+
+for (RelNode child : relNode.getInputs()) {
+  if(checkCartesianJoin(child, leftKeys, rightKeys)) {
+return true;
+  }
+}
+
+return false;
+  }
 }

http://git-wip-us.apache.org/repos/asf/drill/blob/d72d6030/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
--
diff --git 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
index 0ac7c97..35e7f5c 100644
--- 
a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
+++ 
b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/DefaultSqlHandler.java
@@ -18,6 +18,7 @@
 package org.apache.drill.exec.planner.sql.handlers;
 
 import java.io.IOException;
+import java.util.ArrayList;
 import java.util.Collection;
 import java.util.List;
 
@@ -35,6 +36,7 @@ import org.apache.drill.exec.ops.QueryContext;
 import org.apache.drill.exec.physical.PhysicalPlan;
 import org.apache.drill.exec.physical.base.AbstractPhysicalVisitor;
 import org.apache.drill.exec.physical.base.PhysicalOperator;
+import org.apache.

drill git commit: DRILL-1378: Ctrl-C to cancel a query that has not returned with the first result set.

2015-02-25 Thread parthc
Repository: drill
Updated Branches:
  refs/heads/master f7ef5ec78 -> 471013836


DRILL-1378: Ctrl-C to cancel a query that has not returned with the first 
result set.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/47101383
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/47101383
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/47101383

Branch: refs/heads/master
Commit: 471013836419185d51a2d57bf5b89c4087053255
Parents: f7ef5ec
Author: Parth Chandra 
Authored: Wed Feb 25 09:56:12 2015 -0800
Committer: Parth Chandra 
Committed: Wed Feb 25 17:24:54 2015 -0800

--
 .../java/org/apache/drill/jdbc/DrillResultSet.java | 17 +
 1 file changed, 17 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/drill/blob/47101383/exec/jdbc/src/main/java/org/apache/drill/jdbc/DrillResultSet.java
--
diff --git a/exec/jdbc/src/main/java/org/apache/drill/jdbc/DrillResultSet.java 
b/exec/jdbc/src/main/java/org/apache/drill/jdbc/DrillResultSet.java
index 88a6c6d..77b2c37 100644
--- a/exec/jdbc/src/main/java/org/apache/drill/jdbc/DrillResultSet.java
+++ b/exec/jdbc/src/main/java/org/apache/drill/jdbc/DrillResultSet.java
@@ -76,6 +76,20 @@ public class DrillResultSet extends AvaticaResultSet {
 listener.close();
   }
 
+  @Override
+  public boolean next() throws SQLException {
+// Next may be called after close has been called (for example after a 
user cancel) which in turn
+// sets the cursor to null. So we must check before we call next.
+// TODO: handle next() after close is called in the Avatica code.
+if(super.cursor!=null){
+  return super.next();
+}else{
+  return false;
+}
+
+  }
+
+
   @Override protected DrillResultSet execute() throws SQLException{
 // Call driver's callback. It is permitted to throw a RuntimeException.
 DrillConnectionImpl connection = (DrillConnectionImpl) 
statement.getConnection();
@@ -200,6 +214,9 @@ public class DrillResultSet extends AvaticaResultSet {
   qrb.getData().release();
 }
   }
+  // close may be called before the first result is received and the main 
thread is blocked waiting
+  // for the result. In that case we want to unblock the main thread.
+  latch.countDown();
   completed = true;
 }
 



svn commit: r1662344 [1/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Author: adi
Date: Thu Feb 26 01:16:43 2015
New Revision: 1662344

URL: http://svn.apache.org/r1662344
Log:
DRILL-2315: Confluence conversion plus fixes (for Kristine Hahn)

Added:
drill/site/trunk/content/drill/README.md
drill/site/trunk/content/drill/docs/advanced-properties/
drill/site/trunk/content/drill/docs/advanced-properties/index.html
drill/site/trunk/content/drill/docs/apache-drill-contribution-ideas/

drill/site/trunk/content/drill/docs/apache-drill-contribution-ideas/index.html

drill/site/trunk/content/drill/docs/configuring-odbc-connections-for-linux-and-mac-os-x/

drill/site/trunk/content/drill/docs/configuring-odbc-connections-for-linux-and-mac-os-x/index.html
drill/site/trunk/content/drill/docs/drill-patch-review-tool/
drill/site/trunk/content/drill/docs/drill-patch-review-tool/index.html
drill/site/trunk/content/drill/docs/driver-configuration-options/
drill/site/trunk/content/drill/docs/driver-configuration-options/index.html
drill/site/trunk/content/drill/docs/img/58.png   (with props)
drill/site/trunk/content/drill/docs/img/BI_to_Drill_2.png   (with props)
drill/site/trunk/content/drill/docs/img/HbaseViewCreation0.png   (with 
props)
drill/site/trunk/content/drill/docs/img/HbaseViewDSN.png   (with props)
drill/site/trunk/content/drill/docs/img/Hbase_Browse.png   (with props)
drill/site/trunk/content/drill/docs/img/Hive_DSN.png   (with props)
drill/site/trunk/content/drill/docs/img/ODBC_CustomSQL.png   (with props)
drill/site/trunk/content/drill/docs/img/ODBC_HbasePreview2.png   (with 
props)
drill/site/trunk/content/drill/docs/img/ODBC_HbaseView.png   (with props)
drill/site/trunk/content/drill/docs/img/ODBC_HiveConnection.png   (with 
props)
drill/site/trunk/content/drill/docs/img/ODBC_to_Drillbit.png   (with props)
drill/site/trunk/content/drill/docs/img/ODBC_to_Quorum.png   (with props)
drill/site/trunk/content/drill/docs/img/Parquet_DSN.png   (with props)
drill/site/trunk/content/drill/docs/img/Parquet_Preview.png   (with props)
drill/site/trunk/content/drill/docs/img/RegionParquet_table.png   (with 
props)
drill/site/trunk/content/drill/docs/img/SelectHbaseView.png   (with props)
drill/site/trunk/content/drill/docs/img/Untitled.png   (with props)
drill/site/trunk/content/drill/docs/img/VoterContributions_hbaseview.png   
(with props)
drill/site/trunk/content/drill/docs/img/ngram_plugin.png   (with props)
drill/site/trunk/content/drill/docs/img/ngram_plugin2.png   (with props)
drill/site/trunk/content/drill/docs/img/settings.png   (with props)
drill/site/trunk/content/drill/docs/img/student_hive.png   (with props)

drill/site/trunk/content/drill/docs/installing-the-mapr-drill-odbc-driver-on-linux/

drill/site/trunk/content/drill/docs/installing-the-mapr-drill-odbc-driver-on-linux/index.html

drill/site/trunk/content/drill/docs/installing-the-mapr-drill-odbc-driver-on-mac-os-x/

drill/site/trunk/content/drill/docs/installing-the-mapr-drill-odbc-driver-on-mac-os-x/index.html
drill/site/trunk/content/drill/docs/odbc-jdbc-interfaces/
drill/site/trunk/content/drill/docs/odbc-jdbc-interfaces/index.html
drill/site/trunk/content/drill/docs/reserved-keywords/
drill/site/trunk/content/drill/docs/reserved-keywords/index.html
drill/site/trunk/content/drill/docs/sql-reference/
drill/site/trunk/content/drill/docs/sql-reference/index.html

drill/site/trunk/content/drill/docs/step-1-install-the-mapr-drill-odbc-driver-on-windows/

drill/site/trunk/content/drill/docs/step-1-install-the-mapr-drill-odbc-driver-on-windows/index.html

drill/site/trunk/content/drill/docs/step-2-configure-odbc-connections-to-drill-data-sources/

drill/site/trunk/content/drill/docs/step-2-configure-odbc-connections-to-drill-data-sources/index.html

drill/site/trunk/content/drill/docs/step-3-connect-to-drill-data-sources-from-a-bi-tool/

drill/site/trunk/content/drill/docs/step-3-connect-to-drill-data-sources-from-a-bi-tool/index.html
drill/site/trunk/content/drill/docs/tableau-examples/
drill/site/trunk/content/drill/docs/tableau-examples/index.html

drill/site/trunk/content/drill/docs/testing-the-odbc-connection-on-linux-and-mac-os-x/

drill/site/trunk/content/drill/docs/testing-the-odbc-connection-on-linux-and-mac-os-x/index.html

drill/site/trunk/content/drill/docs/using-drill-explorer-to-browse-data-and-create-views/

drill/site/trunk/content/drill/docs/using-drill-explorer-to-browse-data-and-create-views/index.html
drill/site/trunk/content/drill/docs/using-the-jdbc-driver/
drill/site/trunk/content/drill/docs/using-the-jdbc-driver/index.html

drill/site/trunk/content/drill/docs/using-the-mapr-odbc-driver-on-linux-and-mac-os-x/

drill/site/trunk/content/drill/docs/using-the-mapr-odbc-driver-on-linux-and-mac-os-x/index.html
drill/site/trunk/content/drill/docs/using-the-mapr-odbc-driver-

svn commit: r1662344 [5/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: drill/site/trunk/content/drill/docs/install-drill/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/install-drill/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/install-drill/index.html (original)
+++ drill/site/trunk/content/drill/docs/install-drill/index.html Thu Feb 26 
01:16:43 2015
@@ -74,16 +74,6 @@ clustered Hadoop environment, you can in
 Installing in distributed mode requires some configuration, however once you
 install you can connect Drill to your Hive, HBase, or distributed file system
 data sources and run queries on them.
-
-Click on any of the following links for more information about how to 
install
-Drill in embedded or distributed mode:
-
-
-Apache 
Drill in 10 Minutes
-Deploying
 Apache Drill in a Clustered Environment
-Installing 
Drill in Embedded Mode
-Installing
 Drill in Distributed Mode
-
 
 
 

Modified: 
drill/site/trunk/content/drill/docs/installing-drill-in-distributed-mode/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/installing-drill-in-distributed-mode/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- 
drill/site/trunk/content/drill/docs/installing-drill-in-distributed-mode/index.html
 (original)
+++ 
drill/site/trunk/content/drill/docs/installing-drill-in-distributed-mode/index.html
 Thu Feb 26 01:16:43 2015
@@ -106,11 +106,12 @@ tar xzf apache-drill-.tar
 If you are using external JAR files, edit 
drill-env.sh,located in /opt/drill/conf/, and define 
HADOOP_HOME:
 export HADOOP_HOME="~/hadoop/hadoop-0.20.2/"
 
-In drill-override.conf,create a unique Drill cluster 
ID, and provide Zookeeper host names and port numbers to configure a 
connection to your Zookeeper quorum.
+In drill-override.conf, create a unique Drill 
cluster ID, and provide Zookeeper host names and port numbers to 
configure a connection to your Zookeeper quorum.
 
-a. Edit drill-override.conflocated in 
~/drill/drill-/conf/.
-
-b. Provide a unique cluster-id and the Zookeeper host names 
and port numbers in zk.connect. If you install Drill on multiple 
nodes, assign the same cluster ID to each Drill node so that all 
Drill nodes share the same ID. The default Zookeeper port is 2181.
+
+Edit drill-override.conflocated in 
~/drill/drill-/conf/.
+Provide a unique cluster-id and the Zookeeper host names and 
port numbers in zk.connect. If you install Drill on multiple 
nodes, assign the same cluster ID to each Drill node so that all 
Drill nodes share the same ID. The default Zookeeper port is 2181.
+
 
 Example
  
drill.exec:{
@@ -123,7 +124,7 @@ tar xzf apache-drill-.tar
 
 
 
-You can connect Drill to various types of data sources. Refer to https://cwiki.apache.org/confluence/display/DRIL%0AL/Connecting+to+Data+Sources";>Connect
+You can connect Drill to various types of data sources. Refer to Connect
 Apache Drill to Data Sources to get configuration instructions for the
 particular type of data source that you want to connect to Drill.
 

Modified: 
drill/site/trunk/content/drill/docs/installing-drill-in-embedded-mode/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/installing-drill-in-embedded-mode/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- 
drill/site/trunk/content/drill/docs/installing-drill-in-embedded-mode/index.html
 (original)
+++ 
drill/site/trunk/content/drill/docs/installing-drill-in-embedded-mode/index.html
 Thu Feb 26 01:16:43 2015
@@ -78,22 +78,14 @@ running Linux, Mac OS X, or Windows.
 
 You must have the following software installed on your machine to run 
Drill:
 
-SoftwareDescriptionhttp://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html";
 rel="nofollow">Oracle JDK version 7A 
set of programming tools for developing Java 
applications.
-
-A set of programming tools for developing Java applications.  
+SoftwareDescriptionhttp://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html";
 rel="nofollow">Oracle JDK version 7A set of programming tools 
for developing Java applications.
 
 You can run the following command to verify that the system meets the 
software
 prerequisite:
 
-CommandExample Outputjava –versionjava version "1.7.0_65"Java(TM) SE Runtime Environment (build 1.7.0_65-b19)Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed 
mode)
-
-Click on the installation link appropriate for your operating system:
+CommandExample 
Outputjava –versionjava version "1.7.0_65"Java(TM) SE Runtime 
Environment (build 1.7.0_65-b19)Java HotSpot(TM) 64-Bit 
Server VM (build 24.65-b04, mixed mode)
 
-
-Installing 
Drill on Linux
-Installing Drill 
on Mac OS X
-Installing 
Drill on Windo

svn commit: r1662344 [3/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: drill/site/trunk/content/drill/docs/explain-commands/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/explain-commands/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/explain-commands/index.html (original)
+++ drill/site/trunk/content/drill/docs/explain-commands/index.html Thu Feb 26 
01:16:43 2015
@@ -91,7 +91,7 @@ conditions against the same data will re
 change a configuration option, for example, or update the tables or files that
 you are selecting from, you are likely to see plan changes.
 
-EXPLAIN Syntax
+EXPLAIN Syntax
 
 The EXPLAIN command supports the following syntax:
 explain plan [ including all attributes ] [ with 
implementation | without implementation ] for  ;
@@ -108,7 +108,7 @@ physical and logical plans.
 These options return the physical and logical plan information, 
respectively.
 The default is physical (WITH IMPLEMENTATION).
 
-EXPLAIN for Physical Plans
+EXPLAIN for Physical Plans
 
 The EXPLAIN PLAN FOR  command returns the chosen physical execution
 plan for a query statement without running the query. You can use this command
@@ -173,7 +173,7 @@ for submitting the query via Drill APIs.
   },
 
 
-Costing Information
+Costing Information
 
 Add the INCLUDING ALL ATTRIBUTES option to the EXPLAIN command to see cost
 estimates for the query plan. For example:
@@ -192,7 +192,7 @@ select * from dfs.`/Users/brumsby/drill/
 00-04ProducerConsumer: rowcount = 1.0, cumulative cost = {1.0 
rows, 1.0 cpu, 0.0 io, 0.0 network}, id = 3106
 00-05  Scan(groupscan=[EasyGroupScan 
[selectionRoot=/Users/brumsby/drill/donuts.json, columns = null]]): rowcount = 
1.0, cumulative cost = {0.0 rows, 0.0 cpu, 0.0 io, 0.0 network}, id = 3101
 
-EXPLAIN for Logical Plans
+EXPLAIN for Logical Plans
 
 To return the logical plan for a query (again, without actually running the
 query), use the EXPLAIN PLAN WITHOUT IMPLEMENTATION syntax:

Modified: drill/site/trunk/content/drill/docs/flatten-function/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/flatten-function/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/flatten-function/index.html (original)
+++ drill/site/trunk/content/drill/docs/flatten-function/index.html Thu Feb 26 
01:16:43 2015
@@ -143,7 +143,7 @@ order by count(celltbl.catl) desc limit
 +---|+
 
 A common use case for FLATTEN is its use in conjunction with the
-KVGEN function.
+KVGEN function.
 
 
 

Modified: drill/site/trunk/content/drill/docs/flexibility/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/flexibility/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/flexibility/index.html (original)
+++ drill/site/trunk/content/drill/docs/flexibility/index.html Thu Feb 26 
01:16:43 2015
@@ -69,7 +69,7 @@
 
 The following features contribute to 
Drill's flexible architecture:
 
-_Dynamic schema discovery _
+Dynamic schema discovery
 
 Drill does not require schema or type specification for the data in order to
 start the query execution process. Instead, Drill starts processing the data
@@ -121,7 +121,7 @@ traditional DB (Databases->Tables/Vie
 through the ANSI standard INFORMATION_SCHEMA database
 
 For more information on how to configure and work various data sources with
-Drill, refer to https://cwiki.apache.or%0Ag/confluence/display/DRILL/Connect+Apache+Drill+to+Data+Sources";>Connect
 Apache Drill to Data Sources.
+Drill, refer to Connect Apache 
Drill to Data Sources.
 
 Extensibility
 

Modified: 
drill/site/trunk/content/drill/docs/getting-to-know-the-drill-sandbox/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/getting-to-know-the-drill-sandbox/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- 
drill/site/trunk/content/drill/docs/getting-to-know-the-drill-sandbox/index.html
 (original)
+++ 
drill/site/trunk/content/drill/docs/getting-to-know-the-drill-sandbox/index.html
 Thu Feb 26 01:16:43 2015
@@ -82,7 +82,7 @@ optimization rules for Drill to leverage
 
 Take a look at the pre-configured storage plugins by opening the Drill Web 
UI.
 
-Feel free to skip this section and jump directly to the queries: Lesson 1:
+Feel free to skip this section and jump directly to the queries: Lesson 1:
 Learn About the Data
 Set
 
@@ -97,8 +97,7 @@ Set
 
 A storage plugin configuration for MapR-DB in the sandbox. Drill uses a 
single
 storage plugin for connecting to HBase as well as MapR-DB, which is an
-enterprise grade in-Hadoop NoSQL databas

svn commit: r1662344 [6/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: 
drill/site/trunk/content/drill/docs/planning-and-execution-options/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/planning-and-execution-options/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- 
drill/site/trunk/content/drill/docs/planning-and-execution-options/index.html 
(original)
+++ 
drill/site/trunk/content/drill/docs/planning-and-execution-options/index.html 
Thu Feb 26 01:16:43 2015
@@ -88,17 +88,17 @@ persist across all sessions.
 The following table contains planning and execution options that you can set
 at the system or session level:
 
-Option nameDefault 
valueDescriptionexec.errors.verbosefalseThis option enables or disables the verbose message 
that Drill returns when a query fails. When enabled, Drill provides additional 
information about failed queries.exec.max_hash_table_size1073741824The default maximum size for hash 
tables.exec.min_hash_table_size65536The default starting size for hash 
tables. Increasing this size is useful for very large aggregations or joins 
when you have large amounts of memory for Drill to use. Drill can spend a lot 
of time resizing the hash table as it finds new data. If you have large data 
sets, you can increase this hash table size to increase 
performance.planner.add_producer_consumerfalse This option enables or disables a secondary 
reading thread that works out of band of the rest of the scanning fragment to 
prefetch data from disk. If you interact with a certain type of storage medium that is 
slow or does not prefetch much data, this option tells Drill to add a producer 
consumer reading thread to the operati
 on. Drill can then assign one thread that focuses on a single reading 
fragment. If Drill is using memory, you can disable this option 
to get better performance. If Drill is using disk space, you should enable this 
option and set a reasonable queue size for the 
planner.producer_consumer_queue_size option.planner.broadcast_threshold100Threshold, 
in terms of a number of rows, that determines whether a broadcast join is 
chosen for a query. Regardless of the setting of the broadcast_join option 
(enabled or disabled), a broadcast join is not chosen unless the right side of 
the join is estimated to contain fewer rows than this threshold. The intent of 
this option is to avoid broadcasting too many rows for join purposes. 
Broadcasting involves sending data across 
 nodes and is a network-intensive operation. (The "right side" of the 
join, which may itself be a join or simply a table, is determined by cost-based 
optimizations and heuristics during physical planning.)planner.enable_broadcast_joinplanner.enable_hashaggplanner.enable_hashjoinplanner.enable_mergejoinplanner.enable_multiphase_aggplanner.enable_streamaggtrueThese options enable or disable specific aggregation 
and join operators for queries. These operators are all enabled by default and 
in general should not be disabled.Hash aggregation and hash join are 
hash-based operations. Streaming aggregation and merge join are sort-based 
operations. Both hash-based and sort-based operations consume memory; however, 
currently, hash-based operations do not spill
  to disk as needed, but the sort-based operations do. If large hash operations 
do not fit in memory on your system, you may need to disable these operations. 
Queries will continue to run, using alternative plans.planner.producer_consumer_queue_size10Determines how much data to prefetch from disk (in record 
batches) out of band of query execution. The larger the queue size, the greater 
the amount of memory that the queue and overall query execution 
consumes.planner.slice_target10The number of records manipulated within a fragment before 
Drill parallelizes them.planner.
 width.max_per_node The default depends on the number of cores on each 
node.In this 
context "width" refers to fanout or distribution potential: the 
ability to run a query in parallel across the cores on a node and the nodes on 
a cluster.A physical plan consists of intermediate operations, 
known as query "fragments," that run concurrently, yielding 
opportunities for parallelism above and below each exchange operator in the 
plan. An exchange operator represents a breakpoint in the execution flow where 
processing can be distributed. For example, a single-process scan of a file may 
flow into an exchange operator, followed by a multi-process aggregation 
fragment. The maximum width per node defines the 
maximum degree of parallelism for any fragment of a query, but the setting 
applies at the level of a 
 single node in the cluster.The default maximum degree of 
parallelism per node is calculated as follows, with the theoretical maximum 
automatically scaled back (and rounded down) so that only 70% of the actual 
available capacity is taken into account:
+Option nameDefault valueDescriptionexec.errors.verbosefalseT

svn commit: r1662344 [8/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: 
drill/site/trunk/content/drill/docs/supported-date-time-data-type-formats/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/supported-date-time-data-type-formats/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- 
drill/site/trunk/content/drill/docs/supported-date-time-data-type-formats/index.html
 (original)
+++ 
drill/site/trunk/content/drill/docs/supported-date-time-data-type-formats/index.html
 Thu Feb 26 01:16:43 2015
@@ -136,14 +136,14 @@ Apache Drill does not support time

   
  
-  
+
 ## Time
 
 Drill supports the `time` data type in the following format:
 
 HH:mm:ss.SSS (hour:minute:sec.milliseconds)
 
-The following table provides some examples for the` time` data type:
+The following table provides some examples for the `time` data type:
 
 
   Use
@@ -162,7 +162,6 @@ The following table provides some exampl
   select 
cast(time_col as time) from dfs.`/tmp/input.json`;
 
 
-  
 
 Interval
 
@@ -174,7 +173,7 @@ The following table provides some exampl
 supports the interval data type in the following format:
 P 
[qty] Y [qty] M
 
-The following table provides examples for interval yeardata 
type:
+The following table provides examples for interval year data 
type:
 
 
 Use
@@ -190,7 +189,6 @@ supports the interval data
 select 
cast(col as interval year) from dfs.`/tmp/input.json`;
   

-  
 
 Interval Day
 
@@ -201,15 +199,14 @@ supports the interval day d
 
 The following table provides examples for interval day data 
type:
 
-UseExampleLiteralselect interval '1 10:20:30.123' day to second from 
dfs.`/tmp/input.json`;select 
interval '1 10' day to hour from dfs.`/tmp/input.json`;select interval '10' day  from 
dfs.`/tmp/input.json`;select 
interval '10' hour  from dfs.`/tmp/input.json`;select interval '10.999' second  from 
dfs.`/tmp/input.json`;JSON Input{"col" : 
"P1DT10H20M30S"}{"col" : "P1DT
 10H20M30.123S"}{"col" : "P1D"}{"col" : "PT10H"}{"col" : 
"PT10.10S"}{"col" : "PT20S"}{"col" : 
"PT10H10S"}CAST from VARCHARselect cast(col as interval 
day) from dfs.`/tmp/input.json`; 
-  
+UseExampleLiteralselect interval '1 10:20:30.123' day to second from 
dfs.`/tmp/input.json`;select 
interval '1 10' day to hour from dfs.`/tmp/input.json`;select interval '10' day  from 
dfs.`/tmp/input.json`;select 
interval '10' hour  from dfs.`/tmp/input.json`;select interval '10.999' second  from 
dfs.`/tmp/input.json`;JSON Input{"col" : 
"P1DT10H20M30S"}{"col" : "P1DT10H20M30.123S"}{"col" : &q
 uot;P1D"}{"col" : 
"PT10H"}{"col" : "PT10.10S"}{"col" : "PT20S"}{"col" : 
"PT10H10S"}CAST from VARCHARselect cast(col as interval 
day) from dfs.`/tmp/input.json`; 
 
 Literal
 
-The following table provides a list ofdate/time literals that 
Drill
+The following table provides a list of date/time literals that 
Drill
 supports with examples of each:
 
-FormatInterpretationExampleinterval '1 10:20:30.123' 
day to second1 day, 10 
hours, 20 minutes, 30 seconds, and 123 thousandths of a secondselect interval 
'1 10:20:30.123' day to second from 
dfs.`/tmp/input.json`;interval '1 10' day to 
hour1 day 10 
hoursselect interval '1 10' day to hour from 
dfs.`/tmp/input.json`;interval '10' day10 daysselect interval '10' day from 
dfs.`/tmp/input.json`;interval '10' 
hour10 
hoursselect interval '10' hour from 
dfs.`/tmp/input.json`;interval '10.999' 
second10.999 
secondsselect interval '10.999' second from dfs.`/tmp/input.json`; 

+FormatInterpretationExampleinterval '1 10:20:30.123' day to second1 day, 10 hours, 20 minutes, 30 seconds, and 123 
thousandths of a secondselect interval '1 10:20:30.123' day to second from 
dfs.`/tmp/input.json`;interval '1 10' day to 
hour1 day 10 
hoursselect interval '1 10' day to hour from 
dfs.`/tmp/input.json`;interval '10' 
day10 daysselect interval '10' day from 
 >dfs.`/tmp/input.json`;valign="top">interval '10' 
 >hour10 
 >hoursselect interval '10' hour from 
 >dfs.`/tmp/input.json`;valign="top">interval '10.999' 
 >second10.999 
 >secondsselect interval '10.999' second from dfs.`/tmp/input.json`; 
 >
 
 
 

Added: drill/site/trunk/content/drill/docs/tableau-examples/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/tableau-examples/index.html?rev=1662344&view=auto
==
--- drill/site/trunk/content/drill/docs/tableau-examples/index.html (added)
+++ drill/site/trunk/content/drill/docs/tableau-examples/index.html Thu Feb 26 
01:16:43 2015
@@ -0,0 +1,343 @@
+
+
+
+
+
+
+
+
+Tableau Examples - Apache Drill
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  
+  
+Documentation
+
+  Overview
+  https://cwiki.apache.org/confluence/display/DRILL/Apache+Drill+in+10+Minutes";
 target="_blank">Drill in 10 Minutes
+  Why Drill? 
+  Architecture
+
+  
+  
+Community
+
+  Team
+  Events and Meetup

svn commit: r1662344 [4/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: drill/site/trunk/content/drill/docs/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/index.html (original)
+++ drill/site/trunk/content/drill/docs/index.html Thu Feb 26 01:16:43 2015
@@ -71,7 +71,7 @@
 
 
   
-Apache Drill 
Documentation
+Architectural Overview
 
 
 
@@ -80,7 +80,48 @@
 
   
 
-  Architectural 
Overview
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  
+
+  Core Modules 
within a Drillbit
+  
+  
+  
+
+  
+
+  Architectural 
Highlights
   
   
   
@@ -123,226 +164,15 @@
   
 
   
-Core 
Modules within a Drillbit
-  
-
-  
-Architectural 
Highlights
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
 
   
 
   
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-
-  
-
-  
-
-  Apache Drill 
Tutorial
-  
-  
-  
-
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
-  
-
+Flexibility
   
 
   
-
+Performance
   
 
   
@@ -534,28 +364,16 @@
   
 
   
-Installing the Apache Drill 
Sandbox
-  
 
   
-Getting 
to Know the Drill Sandbox
-  
 
   
-Lession 1: Learn about the 
Data Set
-  
 
   
-Lession 2: Run Queri

svn commit: r1662344 [2/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: 
drill/site/trunk/content/drill/docs/apache-drill-in-10-minutes/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/apache-drill-in-10-minutes/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/apache-drill-in-10-minutes/index.html 
(original)
+++ drill/site/trunk/content/drill/docs/apache-drill-in-10-minutes/index.html 
Thu Feb 26 01:16:43 2015
@@ -85,13 +85,13 @@
 More Information
 
 
-Objective
+Objective
 
 Use Apache Drill to query sample data in 10 minutes. For simplicity, 
you’ll
 run Drill in embedded mode rather than distributed mode to 
try out Drill
 without having to perform any setup tasks.
 
-A Few Bits About Apache Drill
+A Few Bits About Apache Drill
 
 Drill is a clustered, powerful MPP (Massively Parallel Processing) query
 engine for Hadoop that can process petabytes of data, fast. Drill is useful
@@ -100,7 +100,7 @@ capable of querying nested data in forma
 performing dynamic schema discovery. Drill does not require a centralized
 metadata repository.
 
-_Dynamic schema discovery 
_
+Dynamic schema 
discovery
 
 Drill does not require schema or type specification for data in order to 
start
 the query execution process. Drill starts data processing in record-batches
@@ -144,7 +144,7 @@ extend the layer to a broader array of u
 classpath scanning and plugin concept to add additional storage plugins,
 functions, and operators with minimal configuration.
 
-Process Overview
+Process Overview
 
 Download the Apache Drill archive and extract the contents to a directory on
 your machine. The Apache Drill archive contains sample JSON and Parquet files
@@ -159,19 +159,19 @@ commands. SQLLine is used as the shell f
 
 You must have the following software installed on your machine to run 
Drill:
 
-SoftwareDescriptionhttp://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html";
 class="external-link" rel="nofollow">Oracle JDK version 7A set of programming tools for developing Java 
applications.
+SoftwareDescriptionhttp://www.oracle.com/technetwork/java/javase/downloads/jdk7-downloads-1880260.html";
 class="external-link" rel="nofollow">Oracle JDK version 7A set 
of programming tools for developing Java applications.
 
 Prerequisite Validation
 
 Run the following command to verify that the system meets the software
 prerequisite:
-Command Example Outputjava –versionjava version "1.7.0_65"Java(TM) SE Runtime Environment (build 1.7.0_65-b19)Java HotSpot(TM) 64-Bit Server VM (build 24.65-b04, mixed 
mode)
+Command Example 
Outputjava –versionjava version "1.7.0_65"Java(TM) SE Runtime 
Environment (build 1.7.0_65-b19)Java HotSpot(TM) 64-Bit 
Server VM (build 24.65-b04, mixed mode)
 
-Install Drill
+Install Drill
 
 You can install Drill on a machine running Linux, Mac OS X, or Windows.  

 
-Installing Drill on Linux
+Installing Drill on Linux
 
 Complete the following steps to install Drill:
 
@@ -182,7 +182,7 @@ prerequisite:
 Issue the following command to create a new directory to which you can 
extract the contents of the Drill tar.gz file:
 sudo 
mkdir -p /opt/drill
 
-Navigate to the directory where you downloaded the Drill 
tar.gz file.  
+Navigate to the directory where you downloaded the Drill 
tar.gz file.
 Issue the following command to extract the contents of the Drill 
tar.gz file:
 sudo 
tar -xvzf apache-drill-.tar.gz -C /opt/drill
 
@@ -191,9 +191,9 @@ prerequisite:
 
 
 
-At this point, you can https://cwiki.apache.org/confluence/displ%0Aay/DRILL/Apache+Drill+in+10+Minutes#ApacheDrillin10Minutes-StartDrill";>start
 Drill.
+At this point, you can start Drill.
 
-Installing Drill on Mac OS X
+Installing Drill on Mac OS X
 
 Complete the following steps to install Drill:
 
@@ -208,9 +208,8 @@ $ cd drill
 $ pwd
 /Users/max/drill
 
-Click the following link to download the latest, stable version of 
Apache Drill:
-
-http://www.apache.org/dyn/closer.cgi/drill/drill-0.7.0/apache-drill-0.7.0.tar.gz";>http://www.apache.org/dyn/closer.cgi/drill/drill-0.7.0/apache-drill-0.7.0.tar.gz
+Click the following link to download the latest, stable version of 
Apache Drill:
+  http://www.apache.org/dyn/closer.cgi/drill/drill-0.7.0/apache-drill-0.7.0.tar.gz";>http://www.apache.org/dyn/closer.cgi/drill/drill-0.7.0/apache-drill-0.7.0.tar.gz
 Open the downloaded TAR file with the Mac Archive utility 
or a similar tool for unzipping files.
 Move the resulting apache-drill- folder 
into the drill directory that you created.
 Issue the following command to navigate to the 
apache-drill- directory:
@@ -218,9 +217,9 @@ $ pwd
 
 
 
-At this point, you can https://cwiki.apache.org/confluence/displ%0Aay/DRILL/Apache+Drill+in+10+Minutes#ApacheDrillin10Minutes-StartDrill";>start
 Drill.
+At this point, you can start Drill.
 
-Installing Drill on Windows
+Installing Drill on Windows
 
 You can install 

svn commit: r1662344 [7/8] - in /drill/site/trunk/content/drill: ./ blog/2014/12/11/apache-drill-qa-panelist-spotlight/ docs/ docs/2014-q1-drill-report/ docs/advanced-properties/ docs/analyzing-yelp-j

2015-02-25 Thread adi
Modified: drill/site/trunk/content/drill/docs/release-notes/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/release-notes/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/release-notes/index.html (original)
+++ drill/site/trunk/content/drill/docs/release-notes/index.html Thu Feb 26 
01:16:43 2015
@@ -80,7 +80,7 @@ Drill has been tested against MapR, Clou
 distributions. There are associated build profiles and JIRAs that can help you
 run Drill against your preferred distribution
 
-Apache Drill 0.7.0 Key Features
+Apache Drill 0.7.0 Key Features
 
 
 No more dependency on UDP/Multicast - Making it possible for Drill to 
work well in the following scenarios:
@@ -104,7 +104,7 @@ run Drill against your preferred distrib
 Stability improvements in ODBC and JDBC drivers
 
 
-Apache Drill 0.7.0 Key Notes and Limitations
+Apache Drill 0.7.0 Key 
Notes and Limitations
 
 
 The current release supports in-memory and beyond-memory execution. 
However, you must disable memory-intensive hash aggregate and hash join 
operations to leverage this functionality.
@@ -123,18 +123,18 @@ against Apache Hadoop. Drill has been te
 Hortonworks Hadoop distributions. There are associated build profiles and
 JIRAs that can help you run Drill against your preferred distribution.
 
-Apache Drill 0.6.0 Key Features
+Apache Drill 0.6.0 Key Features
 
 This release is primarily a bug fix release, with https://issues.apache.org/jira/secure/ReleaseNote.jspa?projectId=12313820&vers%0Aion=12327472";>more
 than 30 JIRAs closed, but there are some notable features:
 
 
-Direct ANSI SQL access to MongoDB, using the latest MongoDB Plugin 
for Apache Drill
+Direct ANSI SQL access to MongoDB, using the latest MongoDB Plugin for Apache 
Drill
 Filesystem query performance improvements with partition pruning
 Ability to use the file system as a persistent store for query profiles 
and diagnostic information
 Window function support (alpha)
 
 
-Apache Drill 0.6.0 Key Notes and Limitations
+Apache Drill 0.6.0 Key 
Notes and Limitations
 
 
 The current release supports in-memory and beyond-memory execution. 
However, you must disable memory-intensive hash aggregate and hash join 
operations to leverage this functionality.
@@ -157,7 +157,7 @@ against Apache Hadoop. Drill has been te
 Hortonworks Hadoop distributions. There are associated build profiles and
 JIRAs that can help you run Drill against your preferred distribution.
 
-Apache Drill 0.5.0 Key Notes and Limitations
+Apache Drill 0.5.0 Key 
Notes and Limitations
 
 
 The current release supports in memory and beyond memory execution. 
However, you must disable memory-intensive hash aggregate and hash join 
operations to leverage this functionality.
@@ -191,7 +191,7 @@ MapR, Cloudera and Hortonworks Hadoop di
 build profiles or JIRAs that can help you run against your preferred
 distribution.
 
-Some Key Notes & Limitations
+Some Key Notes & Limitations
 
 
 The current release supports in memory and beyond memory execution. 
However, users must disable memory-intensive hash aggregate and hash join 
operations to leverage this functionality.
@@ -241,7 +241,7 @@ will be correct in a future milestone re
 Drill Alpha does not include, there are currently a couple of differences 
for how to write a query in In order to query against
 
 
-UDFs
+UDFs
 
 
 Drill currently supports simple and aggregate functions using scalar, 
repeated and

Modified: drill/site/trunk/content/drill/docs/repeated-count-function/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/repeated-count-function/index.html?rev=1662344&r1=1662343&r2=1662344&view=diff
==
--- drill/site/trunk/content/drill/docs/repeated-count-function/index.html 
(original)
+++ drill/site/trunk/content/drill/docs/repeated-count-function/index.html Thu 
Feb 26 01:16:43 2015
@@ -94,7 +94,7 @@ the count to be grouped by other columns
 this example).
 
 For another example of this function, see the following lesson in the Apache
-Drill Tutorial for Hadoop: Lesson
 3: Run Queries on Complex Data Types.
+Drill Tutorial for Hadoop: Lesson 3: Run 
Queries on Complex Data Types.
 
 
 

Added: drill/site/trunk/content/drill/docs/reserved-keywords/index.html
URL: 
http://svn.apache.org/viewvc/drill/site/trunk/content/drill/docs/reserved-keywords/index.html?rev=1662344&view=auto
==
--- drill/site/trunk/content/drill/docs/reserved-keywords/index.html (added)
+++ drill/site/trunk/content/drill/docs/reserved-keywords/index.html Thu Feb 26 
01:16:43 2015
@@ -0,0 +1,102 @@
+
+
+
+
+
+
+
+
+Reserved Keywords - Apache Drill
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+  
+  
+Documentation
+
+  Overview
+ 

[03/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/query/005-query-info-skema.md
--
diff --git a/_docs/query/005-query-info-skema.md 
b/_docs/query/005-query-info-skema.md
new file mode 100644
index 000..1ad0008
--- /dev/null
+++ b/_docs/query/005-query-info-skema.md
@@ -0,0 +1,109 @@
+---
+title: "Querying the INFORMATION SCHEMA"
+parent: "Query Data"
+---
+When you are using Drill to connect to multiple data sources, you need a
+simple mechanism to discover what each data source contains. The information
+schema is an ANSI standard set of metadata tables that you can query to return
+information about all of your Drill data sources (or schemas). Data sources
+may be databases or file systems; they are all known as "schemas" in this
+context. You can query the following INFORMATION_SCHEMA tables:
+
+  * SCHEMATA
+  * CATALOGS
+  * TABLES
+  * COLUMNS 
+  * VIEWS
+
+## SCHEMATA
+
+The SCHEMATA table contains the CATALOG_NAME and SCHEMA_NAME columns. To allow
+maximum flexibility inside BI tools, the only catalog that Drill supports is
+`DRILL`.
+
+0: jdbc:drill:zk=local> select CATALOG_NAME, SCHEMA_NAME as 
all_my_data_sources from INFORMATION_SCHEMA.SCHEMATA order by SCHEMA_NAME;
++--+-+
+| CATALOG_NAME | all_my_data_sources |
++--+-+
+| DRILL| INFORMATION_SCHEMA  |
+| DRILL| cp.default  |
+| DRILL| dfs.default |
+| DRILL| dfs.root|
+| DRILL| dfs.tmp |
+| DRILL| HiveTest.SalesDB|
+| DRILL| maprfs.logs |
+| DRILL| sys |
++--+-+
+
+The INFORMATION_SCHEMA name and associated keywords are case-sensitive. You
+can also return a list of schemas by running the SHOW DATABASES command:
+
+0: jdbc:drill:zk=local> show databases;
++-+
+| SCHEMA_NAME |
++-+
+| dfs.default |
+| dfs.root|
+| dfs.tmp |
+...
+
+## CATALOGS
+
+The CATALOGS table returns only one row, with the hardcoded DRILL catalog name
+and description.
+
+## TABLES
+
+The TABLES table returns the table name and type for each table or view in
+your databases. (Type means TABLE or VIEW.) Note that Drill does not return
+files available for querying in file-based data sources. Instead, use SHOW
+FILES to explore these data sources.
+
+## COLUMNS
+
+The COLUMNS table returns the column name and other metadata (such as the data
+type) for each column in each table or view.
+
+## VIEWS
+
+The VIEWS table returns the name and definition for each view in your
+databases. Note that file schemas are the canonical repository for views in
+Drill. Depending on how you create a view, the may only be displayed in Drill
+after it has been used.
+
+## Useful Queries
+
+Run an ``INFORMATION_SCHEMA.`TABLES` ``query to view all of the tables and 
views
+within a database. TABLES is a reserved word in Drill and requires back ticks
+(`).
+
+For example, the following query identifies all of the tables and views that
+Drill can access:
+
+SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
+FROM INFORMATION_SCHEMA.`TABLES`
+ORDER BY TABLE_NAME DESC;
+
+TABLE_SCHEMA TABLE_NAMETABLE_TYPE
+
+HiveTest.CustomersDB Customers TABLE
+HiveTest.SalesDB OrdersTABLE
+HiveTest.SalesDB OrderLinesTABLE
+HiveTest.SalesDB USOrders  VIEW
+dfs.default  CustomerSocialProfile VIEW
+
+
+**Note:** Currently, Drill only supports querying Drill views; Hive views are 
not yet supported.
+
+You can run a similar query to identify columns in tables and the data types
+of those columns:
+
+SELECT COLUMN_NAME, DATA_TYPE 
+FROM INFORMATION_SCHEMA.COLUMNS 
+WHERE TABLE_NAME = 'Orders' AND TABLE_SCHEMA = 'HiveTest.SalesDB' AND 
COLUMN_NAME LIKE '%Total';
++-++
+| COLUMN_NAME | DATA_TYPE  |
++-++
+| OrderTotal  | Decimal|
++-++
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/query/006-query-sys-tbl.md
--
diff --git a/_docs/query/006-query-sys-tbl.md b/_docs/query/006-query-sys-tbl.md
new file mode 100644
index 000..9b853ec
--- /dev/null
+++ b/_docs/query/006-query-sys-tbl.md
@@ -0,0 +1,159 @@
+---
+title: "Querying System Tables"
+parent: "Query Data"
+---
+Drill has a sys database that contains system tables. You can query the system
+tables for in

[02/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/sql-ref/cmd-summary/003-select.md
--
diff --git a/_docs/sql-ref/cmd-summary/003-select.md 
b/_docs/sql-ref/cmd-summary/003-select.md
new file mode 100644
index 000..4a4
--- /dev/null
+++ b/_docs/sql-ref/cmd-summary/003-select.md
@@ -0,0 +1,85 @@
+---
+title: "SELECT Statements"
+parent: "SQL Commands Summary"
+---
+Drill supports the following ANSI standard clauses in the SELECT statement:
+
+  * WITH clause
+  * SELECT list
+  * FROM clause
+  * WHERE clause
+  * GROUP BY clause
+  * HAVING clause
+  * ORDER BY clause (with an optional LIMIT clause)
+
+You can use the same SELECT syntax in the following commands:
+
+  * CREATE TABLE AS (CTAS)
+  * CREATE VIEW
+
+INSERT INTO SELECT is not yet supported.
+
+## Column Aliases
+
+You can use named column aliases in the SELECT list to provide meaningful
+names for regular columns and computed columns, such as the results of
+aggregate functions. See the section on running queries for examples.
+
+You cannot reference column aliases in the following clauses:
+
+  * WHERE
+  * GROUP BY
+  * HAVING
+
+Because Drill works with schema-less data sources, you cannot use positional
+aliases (1, 2, etc.) to refer to SELECT list columns, except in the ORDER BY
+clause.
+
+## UNION ALL Set Operator
+
+Drill supports the UNION ALL set operator to combine two result sets. The
+distinct UNION operator is not yet supported.
+
+The EXCEPT, EXCEPT ALL, INTERSECT, and INTERSECT ALL operators are not yet
+supported.
+
+## Joins
+
+Drill supports ANSI standard joins in the FROM and WHERE clauses:
+
+  * Inner joins
+  * Left, full, and right outer joins
+
+The following types of join syntax are supported:
+
+Join type| Syntax  
+---|---  
+Join condition in WHERE clause|FROM table1, table 2 WHERE 
table1.col1=table2.col1  
+USING join in FROM clause|FROM table1 JOIN table2 USING(col1, ...)  
+ON join in FROM clause|FROM table1 JOIN table2 ON table1.col1=table2.col1  
+NATURAL JOIN in FROM clause|FROM table 1 NATURAL JOIN table 2  
+
+Cross-joins are not yet supported. You must specify a join condition when more
+than one table is listed in the FROM clause.
+
+Non-equijoins are supported if the join also contains an equality condition on
+the same two tables as part of a conjunction:
+
+table1.col1 = table2.col1 AND table1.c2 < table2.c2
+
+This restriction applies to both inner and outer joins.
+
+## Subqueries
+
+You can use the following subquery operators in Drill queries. These operators
+all return Boolean results.
+
+  * ALL
+  * ANY
+  * EXISTS
+  * IN
+  * SOME
+
+In general, correlated subqueries are supported. EXISTS and NOT EXISTS
+subqueries that do not contain a correlation join are not yet supported.
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/sql-ref/cmd-summary/004-show-files.md
--
diff --git a/_docs/sql-ref/cmd-summary/004-show-files.md 
b/_docs/sql-ref/cmd-summary/004-show-files.md
new file mode 100644
index 000..1fcf395
--- /dev/null
+++ b/_docs/sql-ref/cmd-summary/004-show-files.md
@@ -0,0 +1,65 @@
+---
+title: "SHOW FILES Command"
+parent: "SQL Commands Summary"
+---
+The SHOW FILES command provides a quick report of the file systems that are
+visible to Drill for query purposes. This command is unique to Apache Drill.
+
+## Syntax
+
+The SHOW FILES command supports the following syntax.
+
+SHOW FILES [ FROM filesystem.directory_name | IN filesystem.directory_name 
];
+
+The FROM or IN clause is required if you do not specify a default file system
+first. You can do this with the USE command. FROM and IN are synonyms.
+
+The directory name is optional. (If the directory name is a Drill reserved
+word, you must use back ticks around the name.)
+
+The command returns standard Linux `stat` information for each file or
+directory, such as permissions, owner, and group values. This information is
+not specific to Drill.
+
+## Examples
+
+The following example returns information about directories and files in the
+local (`dfs`) file system.
+
+   0: jdbc:drill:> use dfs;
+
+   +++
+   | ok |  summary   |
+   +++
+   | true   | Default schema changed to 'dfs' |
+   +++
+   1 row selected (0.318 seconds)
+
+   0: jdbc:drill:> show files;
+   
++-+++++-++--+
+   |name| isDirectory |   isFile   |   length   |   owner|   
group| permissions | accessTime | modificationTime |
+   
++-+++++-++--+
+   | user   | true| false  | 1

[06/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/sql-ref/nested/001-flatten.md
--
diff --git a/_docs/drill-docs/sql-ref/nested/001-flatten.md 
b/_docs/drill-docs/sql-ref/nested/001-flatten.md
deleted file mode 100644
index 124db91..000
--- a/_docs/drill-docs/sql-ref/nested/001-flatten.md
+++ /dev/null
@@ -1,89 +0,0 @@

-title: "FLATTEN Function"
-parent: "Nested Data Functions"

-The FLATTEN function is useful for flexible exploration of repeated data.
-FLATTEN separates the elements in a repeated field into individual records. To
-maintain the association between each flattened value and the other fields in
-the record, all of the other columns are copied into each new record. A very
-simple example would turn this data (one record):
-
-{
-  "x" : 5,
-  "y" : "a string",
-  "z" : [ 1,2,3]
-}
-
-into three distinct records:
-
-select flatten(z) from table;
-| x   | y  | z |
-+-++---+
-| 5   | "a string" | 1 |
-| 5   | "a string" | 2 |
-| 5   | "a string" | 3 |
-
-The function takes a single argument, which must be an array (the `z` column
-in this example).
-
-  
-
-For a more interesting example, consider the JSON data in the publicly
-available [Yelp](https://www.yelp.com/dataset_challenge/dataset) data set. The
-first query below returns three columns from the
-`yelp_academic_dataset_business.json` file: `name`, `hours`, and `categories`.
-The query is restricted to distinct rows where the name is `z``pizza`. The
-query returns only one row that meets those criteria; however, note that this
-row contains an array of four categories:
-
-0: jdbc:drill:zk=local> select distinct name, hours, categories 
-from dfs.yelp.`yelp_academic_dataset_business.json` 
-where name ='zpizza';
-++++
-|name|   hours| categories |
-++++
-| zpizza | 
{"Tuesday":{"close":"22:00","open":"10:00"},"Friday":{"close":"23:00","open":"10:00"},"Monday":{"close":"22:00","open":"10:00"},"Wednesday":{"close":"22:00","open":"10:00"},"Thursday":{"close":"22:00","open":"10:00"},"Sunday":{"close":"22:00","open":"10:00"},"Saturday":{"close":"23:00","open":"10:00"}}
 | ["Gluten-Free","Pizza","Vegan","Restaurants"] |
-
-The FLATTEN function can operate on this single row and return multiple rows,
-one for each category:
-
-0: jdbc:drill:zk=local> select distinct name, flatten(categories) as 
categories 
-from dfs.yelp.`yelp_academic_dataset_business.json` 
-where name ='zpizza' order by 2;
-++-+
-|name| categories  |
-++-+
-| zpizza | Gluten-Free |
-| zpizza | Pizza   |
-| zpizza | Restaurants |
-| zpizza | Vegan   |
-++-+
-4 rows selected (2.797 seconds)
-
-Having used the FLATTEN function to break down arrays into distinct rows, you
-can run queries that do deeper analysis on the flattened result set. For
-example, you can use FLATTEN in a subquery, then apply WHERE clause
-constraints or aggregate functions to the results in the outer query.
-
-The following query uses the same data file as the previous query to flatten
-the categories array, then run a COUNT function on the flattened result:
-
-select celltbl.catl, count(celltbl.catl) catcount 
-from (select flatten(categories) catl 
-from dfs.yelp.`yelp_academic_dataset_business.json`) celltbl 
-group by celltbl.catl 
-order by count(celltbl.catl) desc limit 5;
- 
-+---++
-|catl   |  catcount  |
-+---++
-| Restaurants   | 14303  |
-| Shopping  | 6428   |
-| Food  | 5209   |
-| Beauty & Spas | 3421   |
-| Nightlife | 2870   |
-+---|+
-
-A common use case for FLATTEN is its use in conjunction with the
-[KVGEN](/confluence/display/DRILL/KVGEN+Function) function.
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/sql-ref/nested/002-kvgen.md
--
diff --git a/_docs/drill-docs/sql-ref/nested/002-kvgen.md 
b/_docs/drill-docs/sql-ref/nested/002-kvgen.md
deleted file mode 100644
index a27a781..000
--- a/_docs/drill-docs/sql-ref/nested/002-kvgen.md
+++ /dev/null
@@ -1,150 +0,0 @@

-title: "KVGEN Function"
-parent: "Nested Data Functions"

-KVGEN stands for _key-value generation_. This function is useful when complex
-data files contain arbitrary maps that consist of relatively "unknown" column
-names. Instead of having to specify columns in the map to access the data, you
-can use KVGEN to ret

[11/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/design/005-value.md
--
diff --git a/_docs/design/005-value.md b/_docs/design/005-value.md
new file mode 100644
index 000..828376a
--- /dev/null
+++ b/_docs/design/005-value.md
@@ -0,0 +1,163 @@
+---
+title: "Value Vectors"
+parent: "Design Docs"
+---
+This document defines the data structures required for passing sequences of
+columnar data between 
[Operators](https://docs.google.com/a/maprtech.com/document/d/1zaxkcrK9mYyfpGwX1kAV80z0PCi8abefL45zOzb97dI/edit#bookmark=id.iip15ful18mm).
+
+## Goals
+
+### Support Operators Written in Multiple Language
+
+ValueVectors should support operators written in C/C++/Assembly. To support
+this, the underlying ByteBuffer will not require modification when passed
+through the JNI interface. The ValueVector will be considered immutable once
+constructed. Endianness has not yet been considered.
+
+### Access
+
+Reading a random element from a ValueVector must be a constant time operation.
+To accomodate, elements are identified by their offset from the start of the
+buffer. Repeated, nullable and variable width ValueVectors utilize in an
+additional fixed width value vector to index each element. Write access is not
+supported once the ValueVector has been constructed by the RecordBatch.
+
+### Efficient Subsets of Value Vectors
+
+When an operator returns a subset of values from a ValueVector, it should
+reuse the original ValueVector. To accomplish this, a level of indirection is
+introduced to skip over certain values in the vector. This level of
+indirection is a sequence of offsets which reference an offset in the original
+ValueVector and the count of subsequent values which are to be included in the
+subset.
+
+### Pooled Allocation
+
+ValueVectors utilize one or more buffers under the covers. These buffers will
+be drawn from a pool. Value vectors are themselves created and destroyed as a
+schema changes during the course of record iteration.
+
+### Homogenous Value Types
+
+Each value in a Value Vector is of the same type. The [Record 
Batch](https://docs.google.com/a/maprtech.com/document/d/1zaxkcrK9mYyfpGwX1kAV80z0PCi8abefL45zOzb97dI/edit#bookmark=kix.s2xuoqnr8obe)
 implementation is responsible for
+creating a new Value Vector any time there is a change in schema.
+
+## Definitions
+
+Data Types
+
+The canonical source for value type definitions is the [Drill
+Datatypes](http://bit.ly/15JO9bC) document. The individual types are listed
+under the ‘Basic Data Types’ tab, while the value vector types can be found
+under the ‘Value Vectors’ tab.
+
+Operators
+
+An operator is responsible for transforming a stream of fields. It operates on
+Record Batches or constant values.
+
+Record Batch
+
+A set of field values for some range of records. The batch may be composed of
+Value Vectors, in which case each batch consists of exactly one schema.
+
+Value Vector
+
+The value vector is comprised of one or more contiguous buffers; one which
+stores a sequence of values, and zero or more which store any metadata
+associated with the ValueVector.
+
+## Data Structure
+
+A ValueVector stores values in a ByteBuf, which is a contiguous region of
+memory. Additional levels of indirection are used to support variable value
+widths, nullable values, repeated values and selection vectors. These levels
+of indirection are primarily lookup tables which consist of one or more fixed
+width ValueVectors which may be combined (e.g. for nullable, variable width
+values). A fixed width ValueVector of non-nullable, non-repeatable values does
+not require an indirect lookup; elements can be accessed directly by
+multiplying position by stride.
+
+Fixed Width Values
+
+Fixed width ValueVectors simply contain a packed sequence of values. Random
+access is supported by accessing element n at ByteBuf[0] + Index * Stride,
+where Index is 0-based. The following illustrates the underlying buffer of
+INT4 values [1 .. 6]:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value1.png)
+
+Nullable Values
+
+Nullable values are represented by a vector of bit values. Each bit in the
+vector corresponds to an element in the ValueVector. If the bit is not set,
+the value is NULL. Otherwise the value is retrieved from the underlying
+buffer. The following illustrates a NullableValueVector of INT4 values 2, 3
+and 6:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value2.png)
+  
+### Repeated Values
+
+A repeated ValueVector is used for elements which can contain multiple values
+(e.g. a JSON array). A table of offset and count pairs is used to represent
+each repeated element in the ValueVector. A count of zero means the element
+has no values (note the offset field is unused in this case). The following
+illustrates three fields; one with two values, one with no values, and one
+with a single value:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value3

[05/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/ngram_plugin2.png
--
diff --git a/_docs/img/ngram_plugin2.png b/_docs/img/ngram_plugin2.png
new file mode 100644
index 000..60d432d
Binary files /dev/null and b/_docs/img/ngram_plugin2.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/settings.png
--
diff --git a/_docs/img/settings.png b/_docs/img/settings.png
new file mode 100644
index 000..dcff0d9
Binary files /dev/null and b/_docs/img/settings.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/student_hive.png
--
diff --git a/_docs/img/student_hive.png b/_docs/img/student_hive.png
new file mode 100644
index 000..7e22b88
Binary files /dev/null and b/_docs/img/student_hive.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/install/001-drill-in-10.md
--
diff --git a/_docs/install/001-drill-in-10.md b/_docs/install/001-drill-in-10.md
new file mode 100644
index 000..13d2410
--- /dev/null
+++ b/_docs/install/001-drill-in-10.md
@@ -0,0 +1,365 @@
+---
+title: "Apache Drill in 10 Minutes"
+parent: "Install Drill"
+---
+* Objective
+* A Few Bits About Apache Drill
+* Process Overview
+* Install Drill
+  * Installing Drill on Linux
+  * Installing Drill on Mac OS X
+  * Installing Drill on Windows 
+* Start Drill 
+* Query Sample Data 
+* Summary 
+* Next Steps
+* More Information
+
+## Objective
+
+Use Apache Drill to query sample data in 10 minutes. For simplicity, you’ll
+run Drill in _embedded_ mode rather than _distributed_ mode to try out Drill
+without having to perform any setup tasks.
+
+## A Few Bits About Apache Drill
+
+Drill is a clustered, powerful MPP (Massively Parallel Processing) query
+engine for Hadoop that can process petabytes of data, fast. Drill is useful
+for short, interactive ad-hoc queries on large-scale data sets. Drill is
+capable of querying nested data in formats like JSON and Parquet and
+performing dynamic schema discovery. Drill does not require a centralized
+metadata repository.
+
+### **_Dynamic schema discovery_**
+
+Drill does not require schema or type specification for data in order to start
+the query execution process. Drill starts data processing in record-batches
+and discovers the schema during processing. Self-describing data formats such
+as Parquet, JSON, AVRO, and NoSQL databases have schema specified as part of
+the data itself, which Drill leverages dynamically at query time. Because
+schema can change over the course of a Drill query, all Drill operators are
+designed to reconfigure themselves when schemas change.
+
+### **_Flexible data model_**
+
+Drill allows access to nested data attributes, just like SQL columns, and
+provides intuitive extensions to easily operate on them. From an architectural
+point of view, Drill provides a flexible hierarchical columnar data model that
+can represent complex, highly dynamic and evolving data models. Drill allows
+for efficient processing of these models without the need to flatten or
+materialize them at design time or at execution time. Relational data in Drill
+is treated as a special or simplified case of complex/multi-structured data.
+
+### **_De-centralized metadata_**
+
+Drill does not have a centralized metadata requirement. You do not need to
+create and manage tables and views in a metadata repository, or rely on a
+database administrator group for such a function. Drill metadata is derived
+from the storage plugins that correspond to data sources. Storage plugins
+provide a spectrum of metadata ranging from full metadata (Hive), partial
+metadata (HBase), or no central metadata (files). De-centralized metadata
+means that Drill is NOT tied to a single Hive repository. You can query
+multiple Hive repositories at once and then combine the data with information
+from HBase tables or with a file in a distributed file system. You can also
+use SQL DDL syntax to create metadata within Drill, which gets organized just
+like a traditional database. Drill metadata is accessible through the ANSI
+standard INFORMATION_SCHEMA database.
+
+### **_Extensibility_**
+
+Drill provides an extensible architecture at all layers, including the storage
+plugin, query, query optimization/execution, and client API layers. You can
+customize any layer for the specific needs of an organization or you can
+extend the layer to a broader array of use cases. Drill provides a built in
+classpath scanning and plugin concept to add additional storage plugins,
+functions, and operators with minimal configuration.
+
+## Process Overview
+
+Download the Apache Drill archive and extract the contents to a directory on
+your machine. The Apache Drill archiv

[09/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/001-aol.md
--
diff --git a/_docs/drill-docs/datasets/001-aol.md 
b/_docs/drill-docs/datasets/001-aol.md
deleted file mode 100644
index 472f52f..000
--- a/_docs/drill-docs/datasets/001-aol.md
+++ /dev/null
@@ -1,47 +0,0 @@

-title: "AOL Search"
-parent: "Sample Datasets"

-## Quick Stats
-
-The [AOL Search dataset](http://en.wikipedia.org/wiki/AOL_search_data_leak) is
-a collection of real query log data that is based on real users.
-
-## The Data Source
-
-The dataset consists of 20M Web queries from 650k users over a period of three
-months, 440MB in total and available [for
-download](http://zola.di.unipi.it/smalltext/datasets.html). The format used in
-the dataset is:
-
-AnonID, Query, QueryTime, ItemRank, ClickURL
-
-... with:
-
-  * AnonID, an anonymous user ID number.
-  * Query, the query issued by the user, case shifted with most punctuation 
removed.
-  * QueryTime, the time at which the query was submitted for search.
-  * ItemRank, if the user clicked on a search result, the rank of the item on 
which they clicked is listed.
-  * [ClickURL](http://www.dietkart.com/), if the user clicked on a search 
result, the domain portion of the URL in the clicked result is listed.
-
-Each line in the data represents one of two types of events
-
-  * A query that was NOT followed by the user clicking on a result item.
-  * A click through on an item in the result list returned from a query.
-
-In the first case (query only) there is data in only the first three columns,
-in the second case (click through), there is data in all five columns. For
-click through events, the query that preceded the click through is included.
-Note that if a user clicked on more than one result in the list returned from
-a single query, there will be TWO lines in the data to represent the two
-events.
-
-## The Queries
-
-Interesting queries, for example
-
-  * Users querying for topic X
-  * Users that click on the first (second, third) ranked item
-  * TOP 10 domains searched
-  * TOP 10 domains clicked at
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/002-enron.md
--
diff --git a/_docs/drill-docs/datasets/002-enron.md 
b/_docs/drill-docs/datasets/002-enron.md
deleted file mode 100644
index 2ddbef6..000
--- a/_docs/drill-docs/datasets/002-enron.md
+++ /dev/null
@@ -1,21 +0,0 @@

-title: "Enron Emails"
-parent: "Sample Datasets"

-## Quick Stats
-
-The [Enron Email dataset](http://www.cs.cmu.edu/~enron/) contains data from
-about 150 users, mostly senior management of Enron.
-
-## The Data Source
-
-Totalling some 500,000 messages, the [raw
-data](http://www.cs.cmu.edu/~enron/enron_mail_20110402.tgz) (2009 version of
-the dataset; ~423MB) is available for download as well as a [MySQL
-dump](ftp://ftp.isi.edu/sims/philpot/data/enron-mysqldump.sql.gz) (~177MB).
-
-## The Queries
-
-Interesting queries, for example
-
-  * Via [Query Dataset for Email 
Search](https://dbappserv.cis.upenn.edu/spell/)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/003-wikipedia.md
--
diff --git a/_docs/drill-docs/datasets/003-wikipedia.md 
b/_docs/drill-docs/datasets/003-wikipedia.md
deleted file mode 100644
index 99e6e24..000
--- a/_docs/drill-docs/datasets/003-wikipedia.md
+++ /dev/null
@@ -1,105 +0,0 @@

-title: "Wikipedia Edit History"
-parent: "Sample Datasets"

-# Quick Stats
-
-The Wikipedia Edit History is a public dump of the website made available by
-the wikipedia foundation. You can find details
-[here](http://en.wikipedia.org/wiki/Wikipedia:Database_download). The dumps
-are made available as SQL or XML dumps. You can find the entire schema drawn
-together in this great [diagram](http://upload.wikimedia.org/wikipedia/commons
-/thumb/4/42/MediaWiki_1.20_%2844edaa2%29_database_schema.svg/2193px-
-MediaWiki_1.20_%2844edaa2%29_database_schema.svg.png).
-
-# Approach
-
-The _main_ distribution files are:
-
-  * Current Pages: As of January 2013 this SQL dump was 9.0GB in its 
compressed format.
-  * Complere Archive: This is what we actually want, but at a size of multiple 
terrabytes, clearly exceeds the storage available at home.
-
-To have some real historic data, it is recommended to download a _Special
-Export_ use this
-[link](http://en.wikipedia.org/w/index.php?title=Special:Export). Using this
-tool you generate a category specific XML dump and configure various export
-options. There are some limits like a maximum of 1000 revisions per export,
-but otherwise this should work out just fine.
-
-![](../../img/Overview.png)
-
-The entities used in the query use cases.
-
-# Use Cases
-
-## Select Change Volume 

[12/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/001-core-mod.md
--
diff --git a/_docs/arch/001-core-mod.md b/_docs/arch/001-core-mod.md
new file mode 100644
index 000..17fa18d
--- /dev/null
+++ b/_docs/arch/001-core-mod.md
@@ -0,0 +1,29 @@
+---
+title: "Core Modules within a Drillbit"
+parent: "Architectural Overview"
+---
+The following image represents components within each Drillbit:
+
+![drill query flow]({{ site.baseurl }}/docs/img/DrillbitModules.png)
+
+The following list describes the key components of a Drillbit:
+
+  * **RPC end point**: Drill exposes a low overhead protobuf-based RPC 
protocol to communicate with the clients. Additionally, a C++ and Java API 
layers are also available for the client applications to interact with Drill. 
Clients can communicate to a specific Drillbit directly or go through a 
ZooKeeper quorum to discover the available Drillbits before submitting queries. 
It is recommended that the clients always go through ZooKeeper to shield 
clients from the intricacies of cluster management, such as the addition or 
removal of nodes. 
+
+  * **SQL parser**: Drill uses Optiq, the open source framework, to parse 
incoming queries. The output of the parser component is a language agnostic, 
computer-friendly logical plan that represents the query. 
+  * **Storage plugin interfaces**: Drill serves as a query layer on top of 
several data sources. Storage plugins in Drill represent the abstractions that 
Drill uses to interact with the data sources. Storage plugins provide Drill 
with the following information:
+* Metadata available in the source
+* Interfaces for Drill to read from and write to data sources
+* Location of data and a set of optimization rules to help with efficient 
and faster execution of Drill queries on a specific data source 
+
+In the context of Hadoop, Drill provides storage plugins for files and
+HBase/M7. Drill also integrates with Hive as a storage plugin since Hive
+provides a metadata abstraction layer on top of files, HBase/M7, and provides
+libraries to read data and operate on these sources (Serdes and UDFs).
+
+When users query files and HBase/M7 with Drill, they can do it directly or 
go
+through Hive if they have metadata defined there. Drill integration with Hive
+is only for metadata. Drill does not invoke the Hive execution engine for any
+requests.
+
+  * **Distributed cache**: Drill uses a distributed cache to manage metadata 
(not the data) and configuration information across various nodes. Sample 
metadata information that is stored in the cache includes query plan fragments, 
intermediate state of the query execution, and statistics. Drill uses 
Infinispan as its cache technology.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/002-arch-hilite.md
--
diff --git a/_docs/arch/002-arch-hilite.md b/_docs/arch/002-arch-hilite.md
new file mode 100644
index 000..5ac51bc
--- /dev/null
+++ b/_docs/arch/002-arch-hilite.md
@@ -0,0 +1,10 @@
+---
+title: "Architectural Highlights"
+parent: "Architectural Overview"
+---
+The goal for Drill is to bring the **SQL Ecosystem** and **Performance** of
+the relational systems to **Hadoop scale** data **WITHOUT** compromising on
+the **Flexibility** of Hadoop/NoSQL systems. There are several core
+architectural elements in Apache Drill that make it a highly flexible and
+efficient query engine.
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/arch-hilite/001-flexibility.md
--
diff --git a/_docs/arch/arch-hilite/001-flexibility.md 
b/_docs/arch/arch-hilite/001-flexibility.md
new file mode 100644
index 000..0b5c5e3
--- /dev/null
+++ b/_docs/arch/arch-hilite/001-flexibility.md
@@ -0,0 +1,78 @@
+---
+title: "Flexibility"
+parent: "Architectural Highlights"
+---
+The following features contribute to Drill's flexible architecture:
+
+**_Dynamic schema discovery_**
+
+Drill does not require schema or type specification for the data in order to
+start the query execution process. Instead, Drill starts processing the data
+in units called record-batches and discovers the schema on the fly during
+processing. Self-describing data formats such as Parquet, JSON, AVRO, and
+NoSQL databases have schema specified as part of the data itself, which Drill
+leverages dynamically at query time. Schema can change over the course of a
+Drill query, so all of the Drill operators are designed to reconfigure
+themselves when such schema changing events occur.
+
+**_Flexible data model_**
+
+Drill is purpose-built from the ground up for complex/multi-structured data
+commonly seen in Hadoop/NoSQL applications such as social/mobile, clickstream,
+logs, and sensor equipped IOT. From a user point of view, Drill a

[13/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
DRILL-2315: Confluence conversion plus fixes


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d959a210
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d959a210
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d959a210

Branch: refs/heads/gh-pages-master
Commit: d959a210053f02b5069f0a0cb9f0d34131640ffb
Parents: 23f82db
Author: Kristine Hahn 
Authored: Thu Jan 15 19:42:12 2015 -0800
Committer: Bridget Bevens 
Committed: Wed Feb 25 16:22:24 2015 -0800

--
 .gitignore  |   1 +
 _docs/001-arch.md   |  49 +++
 _docs/001-drill-docs.md |   4 -
 _docs/002-tutorial.md   |  51 +++
 _docs/003-yelp.md   | 412 ++
 _docs/004-install.md|  13 +
 _docs/005-connect.md|  41 ++
 _docs/006-interfaces.md |  50 +++
 _docs/007-query.md  |  41 ++
 _docs/008-sql-ref.md|  14 +
 _docs/009-dev-custom-func.md|  37 ++
 _docs/010-manage.md |  14 +
 _docs/011-develop.md|   9 +
 _docs/012-rn.md | 191 +
 _docs/013-contribute.md |   9 +
 _docs/014-sample-ds.md  |  10 +
 _docs/015-design.md |  13 +
 _docs/016-progress.md   |   8 +
 _docs/017-archived-pages.md |   8 +
 _docs/018-bylaws.md | 170 
 _docs/arch/001-core-mod.md  |  29 ++
 _docs/arch/002-arch-hilite.md   |  10 +
 _docs/arch/arch-hilite/001-flexibility.md   |  78 
 _docs/arch/arch-hilite/002-performance.md   |  55 +++
 _docs/archive/001-how-to-demo.md| 309 ++
 _docs/archive/002-meet-drill.md |  41 ++
 _docs/connect/001-plugin-reg.md |  35 ++
 _docs/connect/002-workspaces.md |  74 
 _docs/connect/003-reg-fs.md |  64 +++
 _docs/connect/004-reg-hbase.md  |  32 ++
 _docs/connect/005-reg-hive.md   |  83 
 _docs/connect/006-default-frmt.md   |  60 +++
 _docs/connect/007-mongo-plugin.md   | 167 
 _docs/connect/008-mapr-db-plugin.md |  31 ++
 _docs/contribute/001-guidelines.md  | 229 ++
 _docs/contribute/002-ideas.md   | 158 +++
 _docs/datasets/001-aol.md   |  47 +++
 _docs/datasets/002-enron.md |  19 +
 _docs/datasets/003-wikipedia.md | 105 +
 _docs/design/001-plan.md|  25 ++
 _docs/design/002-rpc.md |  19 +
 _docs/design/003-query-stages.md|  42 ++
 _docs/design/004-research.md|  48 +++
 _docs/design/005-value.md   | 163 +++
 _docs/dev-custom-fcn/001-dev-simple.md  |  50 +++
 _docs/dev-custom-fcn/002-dev-aggregate.md   |  55 +++
 _docs/dev-custom-fcn/003-add-custom.md  |  26 ++
 _docs/dev-custom-fcn/004-use-custom.md  |  55 +++
 _docs/dev-custom-fcn/005-cust-interface.md  |   8 +
 _docs/develop/001-compile.md|  37 ++
 _docs/develop/002-setup.md  |   5 +
 _docs/develop/003-patch-tool.md | 160 +++
 _docs/drill-docs/001-arch.md|  58 ---
 _docs/drill-docs/002-tutorial.md|  58 ---
 _docs/drill-docs/003-yelp.md| 402 --
 _docs/drill-docs/004-install.md |  20 -
 _docs/drill-docs/005-connect.md |  49 ---
 _docs/drill-docs/006-query.md   |  57 ---
 _docs/drill-docs/006-sql-ref.md |  25 --
 _docs/drill-docs/007-dev-custom-func.md |  47 ---
 _docs/drill-docs/008-manage.md  |  23 -
 _docs/drill-docs/009-develop.md |  16 -
 _docs/drill-docs/010-rn.md  | 192 -
 _docs/drill-docs/011-contribute.md  |  11 -
 _docs/drill-docs/012-sample-ds.md   |  11 -
 _docs/drill-docs/013-design.md  |  14 -
 _docs/drill-docs/014-progress.md|   9 -
 _docs/drill-docs/015-archived-pages.md  |   9 -
 _docs/drill-docs/016-bylaws.md  | 171 
 _docs/drill-docs/arch/001-core-mod.md   |  30 --
 _docs/drill-docs/arch/002-arch-hilite.md|  15 -
 .../arch/arch-hilite/001-flexibility.md |  79 
 .../arch/arch-hilite/002-performance.md |  56 ---
 _docs/drill-docs/archive/001-how-to-demo.md | 

[07/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/query/query-fs/001-query-json.md
--
diff --git a/_docs/drill-docs/query/query-fs/001-query-json.md 
b/_docs/drill-docs/query/query-fs/001-query-json.md
deleted file mode 100644
index 048903b..000
--- a/_docs/drill-docs/query/query-fs/001-query-json.md
+++ /dev/null
@@ -1,41 +0,0 @@

-title: "Querying JSON Files"
-parent: "Querying a File System"

-Your Drill installation includes a sample JSON file located in Drill's
-classpath. The sample JSON file, `employee.json`, contains fictitious employee
-data. Use SQL syntax to query the sample `JSON` file.
-
-To view the data in the `employee.json` file, submit the following SQL query
-to Drill:
-
-``0: jdbc:drill:zk=local> SELECT * FROM cp.`employee.json`;``
-
-The query returns the following results:
-
-**Example of partial output**
-
-
+-++++-+---+
-| employee_id | full_name  | first_name | last_name  | position_id | 
position_ |
-
+-++++-+---+
-| 1101| Steve Eurich | Steve  | Eurich | 16  | 
Store T |
-| 1102| Mary Pierson | Mary   | Pierson| 16  | 
Store T |
-| 1103| Leo Jones  | Leo| Jones  | 16  | Store 
Tem |
-| 1104| Nancy Beatty | Nancy  | Beatty | 16  | 
Store T |
-| 1105| Clara McNight | Clara  | McNight| 16  | 
Store  |
-| 1106| Marcella Isaacs | Marcella   | Isaacs | 17  | 
Stor |
-| 1107| Charlotte Yonce | Charlotte  | Yonce  | 17  | 
Stor |
-| 1108| Benjamin Foster | Benjamin   | Foster | 17  | 
Stor |
-| 1109| John Reed  | John   | Reed   | 17  | Store 
Per |
-| 1110| Lynn Kwiatkowski | Lynn   | Kwiatkowski | 17  
| St |
-| | Donald Vann | Donald | Vann   | 17  | 
Store Pe |
-| 1112| William Smith | William| Smith  | 17  | 
Store  |
-| 1113| Amy Hensley | Amy| Hensley| 17  | 
Store Pe |
-| 1114| Judy Owens | Judy   | Owens  | 17  | Store 
Per |
-| 1115| Frederick Castillo | Frederick  | Castillo   | 17  
| S |
-| 1116| Phil Munoz | Phil   | Munoz  | 17  | Store 
Per |
-| 1117| Lori Lightfoot | Lori   | Lightfoot  | 17  | 
Store |
-...
-
+-++++-+---+
-1,155 rows selected (0.762 seconds)
-0: jdbc:drill:zk=local>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/query/query-fs/002-query-parquet.md
--
diff --git a/_docs/drill-docs/query/query-fs/002-query-parquet.md 
b/_docs/drill-docs/query/query-fs/002-query-parquet.md
deleted file mode 100644
index 9b4e874..000
--- a/_docs/drill-docs/query/query-fs/002-query-parquet.md
+++ /dev/null
@@ -1,99 +0,0 @@

-title: "Querying Parquet Files"
-parent: "Querying a File System"

-Your Drill installation includes a `sample-date` directory with Parquet files
-that you can query. Use SQL syntax to query the `region.parquet` and
-`nation.parquet` files in the `sample-data` directory.
-
-**Note:** Your Drill installation location may differ from the examples used 
here. The examples assume that Drill was installed in embedded mode on your 
machine following the [Apache Drill in 10 Minutes 
](https://cwiki.apache.org/confluence/display/DRILL/Apache+Drill+in+10+Minutes)tutorial.
 If you installed Drill in distributed mode, or your `sample-data` directory 
differs from the location used in the examples, make sure to change the 
`sample-data` directory to the correct location before you run the queries.
-
- Region File
-
-If you followed the Apache Drill in 10 Minutes instructions to install Drill
-in embedded mode, the path to the parquet file varies between operating
-systems.
-
-To view the data in the `region.parquet` file, issue the query appropriate for
-your operating system:
-
-  * Linux  
-``SELECT * FROM dfs.`/opt/drill/apache-drill-0.4.0-incubating/sample-
-data/region.parquet`; ``
-
-   * Mac OS X  
-``SELECT * FROM dfs.`/Users/max/drill/apache-drill-0.4.0-incubating/sample-
-data/region.parquet`;``
-
-   * Windows  
-``SELECT * FROM dfs.`C:\drill\apache-drill-0.4.0-incubating\sample-
-data\region.parquet`;``
-
-The query returns the following results:
-
-+++
-|   EXPR$0   |   EXPR$1   |
-+++
-| AFRICA | lar deposits. blithely final packages cajole. regular 
waters ar |
-

[01/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
Repository: drill
Updated Branches:
  refs/heads/gh-pages-master 23f82db9f -> d959a2100


http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/tutorial/005-lesson3.md
--
diff --git a/_docs/tutorial/005-lesson3.md b/_docs/tutorial/005-lesson3.md
new file mode 100644
index 000..f6c7ae4
--- /dev/null
+++ b/_docs/tutorial/005-lesson3.md
@@ -0,0 +1,379 @@
+---
+title: "Lession 3: Run Queries on Complex Data Types"
+parent: "Apache Drill Tutorial"
+---
+## Goal
+
+This lesson focuses on queries that exercise functions and operators on self-
+describing data and complex data types. Drill offers intuitive SQL extensions
+to work with such data and offers high query performance with an architecture
+built from the ground up for complex data.
+
+## Queries in This Lesson
+
+Now that you have run ANSI SQL queries against different tables and files with
+relational data, you can try some examples including complex types.
+
+  * Access directories and subdirectories of files in a single SELECT 
statement.
+  * Demonstrate simple ways to access complex data in JSON files.
+  * Demonstrate the repeated_count function to aggregate values in an array.
+
+## Query Partitioned Directories
+
+You can use special variables in Drill to refer to subdirectories in your
+workspace path:
+
+  * dir0
+  * dir1
+  * …
+
+Note that these variables are dynamically determined based on the partitioning
+of the file system. No up-front definitions are required on what partitions
+exist. Here is a visual example of how this works:
+
+![drill query flow]({{ site.baseurl }}/docs/img/example_query.png)
+
+### Set workspace to dfs.logs:
+
+0: jdbc:drill:> use dfs.logs;
++++
+| ok | summary |
++++
+| true | Default schema changed to 'dfs.logs' |
++++
+
+### Query logs data for a specific year:
+
+0: jdbc:drill:> select * from logs where dir0='2013' limit 10;
+
+++++++++++---++
+| dir0 | dir1 | trans_id | date | time | cust_id | device | state | 
camp_id | keywords | prod_id | purch_flag |
+
+++++++++++---++
+| 2013 | 11 | 12119 | 11/09/2013 | 02:24:51 | 262 | IOS5 | ny | 0 | 
chamber | 198 | false |
+| 2013 | 11 | 12120 | 11/19/2013 | 09:37:43 | 0 | AOS4.4 | il | 2 | 
outside | 511 | false |
+| 2013 | 11 | 12134 | 11/10/2013 | 23:42:47 | 60343 | IOS5 | ma | 4 | and 
| 421 | false |
+| 2013 | 11 | 12135 | 11/16/2013 | 01:42:13 | 46762 | AOS4.3 | ca | 4 | 
here's | 349 | false |
+| 2013 | 11 | 12165 | 11/26/2013 | 21:58:09 | 41987 | AOS4.2 | mn | 4 | he 
| 271 | false |
+| 2013 | 11 | 12168 | 11/09/2013 | 23:41:48 | 8600 | IOS5 | in | 6 | i | 
459 | false |
+| 2013 | 11 | 12196 | 11/20/2013 | 02:23:06 | 15603 | IOS5 | tn | 1 | like 
| 324 | false |
+| 2013 | 11 | 12203 | 11/25/2013 | 23:50:29 | 221 | IOS6 | tx | 10 | if | 
323 | false |
+| 2013 | 11 | 12206 | 11/09/2013 | 23:53:01 | 2488 | AOS4.2 | tx | 14 | 
unlike | 296 | false |
+| 2013 | 11 | 12217 | 11/06/2013 | 23:51:56 | 0 | AOS4.2 | tx | 9 | can't 
| 54 | false |
+
+++++++++++++
+
+
+This query constrains files inside the subdirectory named 2013. The variable
+dir0 refers to the first level down from logs, dir1 to the next level, and so
+on. So this query returned 10 of the rows for February 2013.
+
+### Further constrain the results using multiple predicates in the query:
+
+This query returns a list of customer IDs for people who made a purchase via
+an IOS5 device in August 2013.
+
+0: jdbc:drill:> select dir0 as yr, dir1 as mth, cust_id from logs
+where dir0='2013' and dir1='8' and device='IOS5' and purch_flag='true'
+order by `date`;
+++++
+| yr | mth | cust_id |
+++++
+| 2013 | 8 | 4 |
+| 2013 | 8 | 521 |
+| 2013 | 8 | 1 |
+| 2013 | 8 | 2 |
+| 2013 | 8 | 4 |
+| 2013 | 8 | 549 |
+| 2013 | 8 | 72827 |
+| 2013 | 8 | 38127 |
+...
+
+### Return monthly counts per customer for a given year:
+
+0: jdbc:drill:> select cust_id, dir1 month_no, count(*) month_count from 
logs
+where dir0=2014 group by cust_id, dir1 order by cust_id, month_no limit 10;
++++-+
+|  cust_id   |  month_no  | month_count |
++++-+
+| 0  | 1  | 143 |
+| 0  | 2  | 118 |
+| 0  | 3

[08/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/manage/004-partition-prune.md
--
diff --git a/_docs/drill-docs/manage/004-partition-prune.md 
b/_docs/drill-docs/manage/004-partition-prune.md
deleted file mode 100644
index fa81034..000
--- a/_docs/drill-docs/manage/004-partition-prune.md
+++ /dev/null
@@ -1,75 +0,0 @@

-title: "Partition Pruning"
-parent: "Manage Drill"

-Partition pruning is a performance optimization that limits the number of
-files and partitions that Drill reads when querying file systems and Hive
-tables. Drill only reads a subset of the files that reside in a file system or
-a subset of the partitions in a Hive table when a query matches certain filter
-criteria.
-
-For Drill to apply partition pruning to Hive tables, you must have created the
-tables in Hive using the `PARTITION BY` clause:
-
-`CREATE TABLE  () PARTITION BY ();`
-
-When you create Hive tables using the `PARTITION BY` clause, each partition of
-data is automatically split out into different directories as data is written
-to disk. For more information about Hive partitioning, refer to the [Apache
-Hive 
wiki](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL/#LanguageManualDDL-PartitionedTables).
-
-Typically, table data in a file system is organized by directories and
-subdirectories. Queries on table data may contain `WHERE` clause filters on
-specific directories.
-
-Drill’s query planner evaluates the filters as part of a Filter operator. If
-no partition filters are present, the underlying Scan operator reads all files
-in all directories and then sends the data to operators downstream, such as
-Filter.
-
-When partition filters are present, the query planner determines if it can
-push the filters down to the Scan such that the Scan only reads the
-directories that match the partition filters, thus reducing disk I/O.
-
-## Partition Pruning Example
-
-The /`Users/max/data/logs` directory in a file system contains subdirectories
-that span a few years.
-
-The following image shows the hierarchical structure of the `…/logs` 
directory
-and (sub) directories:
-
-![](../../img/54.png)
-
-The following query requests log file data for 2013 from the `…/logs`
-directory in the file system:
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 and dir0 = 
2013 limit 2;
-
-If you run the `EXPLAIN PLAN` command for the query, you can see that the`
-…/logs` directory is filtered by the scan operator.
-
-EXPLAIN PLAN FOR SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 
10 and dir0 = 2013 limit 2;
-
-The following image shows a portion of the physical plan when partition
-pruning is applied:
-
-![](../../img/21.png)
-
-## Filter Examples
-
-The following queries include examples of the types of filters eligible for
-partition pruning optimization:
-
-**Example 1: Partition filters ANDed together**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE dir0 = '2014' AND dir1 = '1'
-
-**Example 2: Partition filter ANDed with regular column filter**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 AND dir0 = 
2013 limit 2;
-
-**Example 3: Combination of AND, OR involving partition filters**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE (dir0 = '2013' AND dir1 = 
'1') OR (dir0 = '2014' AND dir1 = '2')
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/manage/005-monitor-cancel.md
--
diff --git a/_docs/drill-docs/manage/005-monitor-cancel.md 
b/_docs/drill-docs/manage/005-monitor-cancel.md
deleted file mode 100644
index 6888eea..000
--- a/_docs/drill-docs/manage/005-monitor-cancel.md
+++ /dev/null
@@ -1,30 +0,0 @@

-title: "Monitoring and Canceling Queries in the Drill Web UI"
-parent: "Manage Drill"

-You can monitor and cancel queries from the Drill Web UI. To access the Drill
-Web UI, the Drillbit process must be running on the Drill node that you use to
-access the Drill Web UI.
-
-To monitor or cancel a query from the Drill Web UI, complete the following
-steps:
-
-  1. Navigate to the Drill Web UI at `:8047.`  
-When you access the Drill Web UI, you see some general information about Drill
-running in your cluster, such as the nodes running the Drillbit process, the
-various ports Drill is using, and the amount of direct memory assigned to
-Drill.  
-![](../../img/7.png)
-
-  2. Select **Profiles** in the toolbar. A list of running and completed 
queries appears. Drill assigns a query ID to each query and lists the Foreman 
node. The Foreman is the Drillbit node that receives the query from the client 
or application. The Foreman drives the entire query.  
-![](../../img/51.png)
-
-  3. Click the **Query ID** for the query that you want to monitor or cancel. 
The Query and Planning window appears.  
-![](../../img/4.png)
-
-  4. Selec

[10/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/016-bylaws.md
--
diff --git a/_docs/drill-docs/016-bylaws.md b/_docs/drill-docs/016-bylaws.md
deleted file mode 100644
index 6f2604f..000
--- a/_docs/drill-docs/016-bylaws.md
+++ /dev/null
@@ -1,171 +0,0 @@

-title: "Project Bylaws"
-parent: "Apache Drill Documentation"

-# Introduction
-
-This document defines the bylaws under which the Apache Drill project
-operates. It defines the roles and responsibilities of the project, who may
-vote, how voting works, how conflicts are resolved, etc.
-
-Drill is a project of the [Apache Software
-Foundation](http://www.apache.org/foundation/). The foundation holds the
-copyright on Apache code including the code in the Drill codebase. The
-[foundation FAQ](http://www.apache.org/foundation/faq.html) explains the
-operation and background of the foundation.
-
-Drill is typical of Apache projects in that it operates under a set of
-principles, known collectively as the _Apache Way_. If you are new to Apache
-development, please refer to the [Incubator
-project](http://incubator.apache.org/) for more information on how Apache
-projects operate.
-
-# Roles and Responsibilities
-
-Apache projects define a set of roles with associated rights and
-responsibilities. These roles govern what tasks an individual may perform
-within the project. The roles are defined in the following sections.
-
-## Users
-
-The most important participants in the project are people who use our
-software. The majority of our contributors start out as users and guide their
-development efforts from the user's perspective.
-
-Users contribute to the Apache projects by providing feedback to contributors
-in the form of bug reports and feature suggestions. As well, users participate
-in the Apache community by helping other users on mailing lists and user
-support forums.
-
-## Contributors
-
-All of the volunteers who are contributing time, code, documentation, or
-resources to the Drill Project. A contributor that makes sustained, welcome
-contributions to the project may be invited to become a committer, though the
-exact timing of such invitations depends on many factors.
-
-## Committers
-
-The project's committers are responsible for the project's technical
-management. Committers have access to a specified set of subproject's code
-repositories. Committers on subprojects may cast binding votes on any
-technical discussion regarding that subproject.
-
-Committer access is by invitation only and must be approved by lazy consensus
-of the active PMC members. A Committer is considered _emeritus_ by his or her
-own declaration or by not contributing in any form to the project for over six
-months. An emeritus committer may request reinstatement of commit access from
-the PMC which will be sufficient to restore him or her to active committer
-status.
-
-Commit access can be revoked by a unanimous vote of all the active PMC members
-(except the committer in question if he or she is also a PMC member).
-
-All Apache committers are required to have a signed [Contributor License
-Agreement (CLA)](http://www.apache.org/licenses/icla.txt) on file with the
-Apache Software Foundation. There is a [Committer
-FAQ](http://www.apache.org/dev/committers.html) which provides more details on
-the requirements for committers.
-
-A committer who makes a sustained contribution to the project may be invited
-to become a member of the PMC. The form of contribution is not limited to
-code. It can also include code review, helping out users on the mailing lists,
-documentation, etc.
-
-## Project Management Committee
-
-The PMC is responsible to the board and the ASF for the management and
-oversight of the Apache Drill codebase. The responsibilities of the PMC
-include
-
-  * Deciding what is distributed as products of the Apache Drill project. In 
particular all releases must be approved by the PMC.
-  * Maintaining the project's shared resources, including the codebase 
repository, mailing lists, websites.
-  * Speaking on behalf of the project.
-  * Resolving license disputes regarding products of the project.
-  * Nominating new PMC members and committers.
-  * Maintaining these bylaws and other guidelines of the project.
-
-Membership of the PMC is by invitation only and must be approved by a lazy
-consensus of active PMC members. A PMC member is considered _emeritus_ by his
-or her own declaration or by not contributing in any form to the project for
-over six months. An emeritus member may request reinstatement to the PMC,
-which will be sufficient to restore him or her to active PMC member.
-
-Membership of the PMC can be revoked by an unanimous vote of all the active
-PMC members other than the member in question.
-
-The chair of the PMC is appointed by the ASF board. The chair is an office
-holder of the Apache Software Foundation (Vice President, Apache D

[04/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread adi
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/interfaces/odbc-win/002-conf-odbc-win.md
--
diff --git a/_docs/interfaces/odbc-win/002-conf-odbc-win.md 
b/_docs/interfaces/odbc-win/002-conf-odbc-win.md
new file mode 100644
index 000..636bd9f
--- /dev/null
+++ b/_docs/interfaces/odbc-win/002-conf-odbc-win.md
@@ -0,0 +1,143 @@
+---
+title: "Step 2. Configure ODBC Connections to Drill Data Sources"
+parent: "Using the MapR ODBC Driver on Windows"
+---
+Complete one of the following steps to create an ODBC connection to Drill data
+sources:
+
+  * Create a Data Source Name
+  * Create an ODBC Connection String
+
+**Prerequisite:** An Apache Drill installation must be available that is 
configured to access the data sources that you want to connect to.  For 
information about how to install Apache Drill, see [Install 
Drill](/drill/docs/install-drill). For information about configuring data 
sources, see the [Apache Drill documentation](/drill/docs).
+
+## Create a Data Source Name (DSN)
+
+Create a DSN that an application can use to connect to Drill data sources. If
+you want to create a DSN for a 32-bit application, you must use the 32-bit
+version of the ODBC Administrator to create the DSN.
+
+  1. To launch the ODBC Administrator, click **Start > All Programs > MapR 
Drill ODBC Driver 1.0 (32|64-bit) > (32|64-bit) ODBC Administrator**.  
+The ODBC Data Source Administrator window appears.
+
+ To launch the 32-bit version of the ODBC driver on a 64-bit machine, run:
+`C:\WINDOWS\SysWOW64\odbcad32.exe`.
+  2. Click the **System DSN** tab to create a system DSN or click the **User 
DSN** tab to create a user DSN. A system DSN is available for all users who log 
in to the machine. A user DSN is available to the user who creates the DSN.
+  3. Click **Add**.
+  4. Select **MapR Drill ODBC Driver** and click **Finish**.  
+ The _MapR Drill ODBC Driver DSN Setup_ window appears.
+  5. In the **Data Source Name** field, enter a name for the DSN,
+  6. Optionally, enter a description of the DSN in the Description field.
+  7. In the Connection Type section, select a connection type and enter the 
associated connection details:
+
+ Connection 
TypePropertiesDescriptionsZookeeper QuorumQuorumA comma-separated 
list of servers in a Zookeeper cluster.For example, 
:5181,:5181,…ClusterIDName of the drillbit cluster. The 
default is drillbits1. You may need to specify a different value if the cluster 
ID was changed in the drill-override.conf file.Direct to Drillbit Provide the IP address or host name of the Drill 
server and the port number that that the Drill server is listening on.  The 
port number defaults to 31010. You may need to specify a different value if the 
port number was 
 changed in the drill-override.conf file.
+ For information on selecting the appropriate connection type, see 
[Connection
+Types](/drill/docs/step-2-configure-odbc-connections-to-drill-data-sources#connection-type).
+  8. In the **Default Schema** field, select the default schema that you want 
to connect to.
+ For more information about the schemas that appear in this list, see 
Schemas.
+  9. Optionally, perform one of the following operations:
+
+ OptionActionUpdate the configuration of the advanced properties.Edit the default values in the Advanced 
Properties section. For more information, see Advanced 
Properties.Configure the types of events 
that you want the driver to log.Click Logging 
Options. For more information, see Logging
 Options.Create views or explore Drill 
sources.Click Drill Explorer. For 
more information, see Using 
Drill Explorer to Browse Data and Create Views.
+  10. Click **OK** to save the DSN.
+
+## Configuration Options
+
+### Connection Type
+
+ODBC can connect directly to a Drillbit or to a ZooKeeper Quorum. Select your
+connection type based on your environment and Drillbit configuration.
+
+The following table lists the appropriate connection type for each scenario:
+
+ScenarioConnection TypeDrillbit is running in embedded mode.Direct 
to DrillbitDrillbit is registered with the 
ZooKeeper in a testing environment.ZooKeeper Quorum or 
Direct to DrillbitDrillbit is registered with 
the ZooKeeper in a production environment.ZooKeeper 
Quorum 
+
+ Connection to Zookeeper Quorum
+
+When you choose to connect to a ZooKeeper Quorum, the ODBC driver connects to
+the ZooKeeper Quorum to get a list of available Drillbits in the specified
+cluster. Then, the ODBC driver submits a query after selecting a Drillbit. All
+Drillbits in the cluster process the query and the Drillbit that received the
+query returns the query results.
+
+![ODBC to Quorum]({{ site.baseurl }}/docs/img/ODBC_to_Quorum.png)
+
+In a production environment, you should connect to a ZooKeeper Quorum for a
+more reliable connection. If one Drillbit is not available, another Drillbit
+that is 

[09/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/001-aol.md
--
diff --git a/_docs/drill-docs/datasets/001-aol.md 
b/_docs/drill-docs/datasets/001-aol.md
deleted file mode 100644
index 472f52f..000
--- a/_docs/drill-docs/datasets/001-aol.md
+++ /dev/null
@@ -1,47 +0,0 @@

-title: "AOL Search"
-parent: "Sample Datasets"

-## Quick Stats
-
-The [AOL Search dataset](http://en.wikipedia.org/wiki/AOL_search_data_leak) is
-a collection of real query log data that is based on real users.
-
-## The Data Source
-
-The dataset consists of 20M Web queries from 650k users over a period of three
-months, 440MB in total and available [for
-download](http://zola.di.unipi.it/smalltext/datasets.html). The format used in
-the dataset is:
-
-AnonID, Query, QueryTime, ItemRank, ClickURL
-
-... with:
-
-  * AnonID, an anonymous user ID number.
-  * Query, the query issued by the user, case shifted with most punctuation 
removed.
-  * QueryTime, the time at which the query was submitted for search.
-  * ItemRank, if the user clicked on a search result, the rank of the item on 
which they clicked is listed.
-  * [ClickURL](http://www.dietkart.com/), if the user clicked on a search 
result, the domain portion of the URL in the clicked result is listed.
-
-Each line in the data represents one of two types of events
-
-  * A query that was NOT followed by the user clicking on a result item.
-  * A click through on an item in the result list returned from a query.
-
-In the first case (query only) there is data in only the first three columns,
-in the second case (click through), there is data in all five columns. For
-click through events, the query that preceded the click through is included.
-Note that if a user clicked on more than one result in the list returned from
-a single query, there will be TWO lines in the data to represent the two
-events.
-
-## The Queries
-
-Interesting queries, for example
-
-  * Users querying for topic X
-  * Users that click on the first (second, third) ranked item
-  * TOP 10 domains searched
-  * TOP 10 domains clicked at
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/002-enron.md
--
diff --git a/_docs/drill-docs/datasets/002-enron.md 
b/_docs/drill-docs/datasets/002-enron.md
deleted file mode 100644
index 2ddbef6..000
--- a/_docs/drill-docs/datasets/002-enron.md
+++ /dev/null
@@ -1,21 +0,0 @@

-title: "Enron Emails"
-parent: "Sample Datasets"

-## Quick Stats
-
-The [Enron Email dataset](http://www.cs.cmu.edu/~enron/) contains data from
-about 150 users, mostly senior management of Enron.
-
-## The Data Source
-
-Totalling some 500,000 messages, the [raw
-data](http://www.cs.cmu.edu/~enron/enron_mail_20110402.tgz) (2009 version of
-the dataset; ~423MB) is available for download as well as a [MySQL
-dump](ftp://ftp.isi.edu/sims/philpot/data/enron-mysqldump.sql.gz) (~177MB).
-
-## The Queries
-
-Interesting queries, for example
-
-  * Via [Query Dataset for Email 
Search](https://dbappserv.cis.upenn.edu/spell/)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/datasets/003-wikipedia.md
--
diff --git a/_docs/drill-docs/datasets/003-wikipedia.md 
b/_docs/drill-docs/datasets/003-wikipedia.md
deleted file mode 100644
index 99e6e24..000
--- a/_docs/drill-docs/datasets/003-wikipedia.md
+++ /dev/null
@@ -1,105 +0,0 @@

-title: "Wikipedia Edit History"
-parent: "Sample Datasets"

-# Quick Stats
-
-The Wikipedia Edit History is a public dump of the website made available by
-the wikipedia foundation. You can find details
-[here](http://en.wikipedia.org/wiki/Wikipedia:Database_download). The dumps
-are made available as SQL or XML dumps. You can find the entire schema drawn
-together in this great [diagram](http://upload.wikimedia.org/wikipedia/commons
-/thumb/4/42/MediaWiki_1.20_%2844edaa2%29_database_schema.svg/2193px-
-MediaWiki_1.20_%2844edaa2%29_database_schema.svg.png).
-
-# Approach
-
-The _main_ distribution files are:
-
-  * Current Pages: As of January 2013 this SQL dump was 9.0GB in its 
compressed format.
-  * Complere Archive: This is what we actually want, but at a size of multiple 
terrabytes, clearly exceeds the storage available at home.
-
-To have some real historic data, it is recommended to download a _Special
-Export_ use this
-[link](http://en.wikipedia.org/w/index.php?title=Special:Export). Using this
-tool you generate a category specific XML dump and configure various export
-options. There are some limits like a maximum of 1000 revisions per export,
-but otherwise this should work out just fine.
-
-![](../../img/Overview.png)
-
-The entities used in the query use cases.
-
-# Use Cases
-
-## Select Change Volume 

[07/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/query/query-fs/001-query-json.md
--
diff --git a/_docs/drill-docs/query/query-fs/001-query-json.md 
b/_docs/drill-docs/query/query-fs/001-query-json.md
deleted file mode 100644
index 048903b..000
--- a/_docs/drill-docs/query/query-fs/001-query-json.md
+++ /dev/null
@@ -1,41 +0,0 @@

-title: "Querying JSON Files"
-parent: "Querying a File System"

-Your Drill installation includes a sample JSON file located in Drill's
-classpath. The sample JSON file, `employee.json`, contains fictitious employee
-data. Use SQL syntax to query the sample `JSON` file.
-
-To view the data in the `employee.json` file, submit the following SQL query
-to Drill:
-
-``0: jdbc:drill:zk=local> SELECT * FROM cp.`employee.json`;``
-
-The query returns the following results:
-
-**Example of partial output**
-
-
+-++++-+---+
-| employee_id | full_name  | first_name | last_name  | position_id | 
position_ |
-
+-++++-+---+
-| 1101| Steve Eurich | Steve  | Eurich | 16  | 
Store T |
-| 1102| Mary Pierson | Mary   | Pierson| 16  | 
Store T |
-| 1103| Leo Jones  | Leo| Jones  | 16  | Store 
Tem |
-| 1104| Nancy Beatty | Nancy  | Beatty | 16  | 
Store T |
-| 1105| Clara McNight | Clara  | McNight| 16  | 
Store  |
-| 1106| Marcella Isaacs | Marcella   | Isaacs | 17  | 
Stor |
-| 1107| Charlotte Yonce | Charlotte  | Yonce  | 17  | 
Stor |
-| 1108| Benjamin Foster | Benjamin   | Foster | 17  | 
Stor |
-| 1109| John Reed  | John   | Reed   | 17  | Store 
Per |
-| 1110| Lynn Kwiatkowski | Lynn   | Kwiatkowski | 17  
| St |
-| | Donald Vann | Donald | Vann   | 17  | 
Store Pe |
-| 1112| William Smith | William| Smith  | 17  | 
Store  |
-| 1113| Amy Hensley | Amy| Hensley| 17  | 
Store Pe |
-| 1114| Judy Owens | Judy   | Owens  | 17  | Store 
Per |
-| 1115| Frederick Castillo | Frederick  | Castillo   | 17  
| S |
-| 1116| Phil Munoz | Phil   | Munoz  | 17  | Store 
Per |
-| 1117| Lori Lightfoot | Lori   | Lightfoot  | 17  | 
Store |
-...
-
+-++++-+---+
-1,155 rows selected (0.762 seconds)
-0: jdbc:drill:zk=local>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/query/query-fs/002-query-parquet.md
--
diff --git a/_docs/drill-docs/query/query-fs/002-query-parquet.md 
b/_docs/drill-docs/query/query-fs/002-query-parquet.md
deleted file mode 100644
index 9b4e874..000
--- a/_docs/drill-docs/query/query-fs/002-query-parquet.md
+++ /dev/null
@@ -1,99 +0,0 @@

-title: "Querying Parquet Files"
-parent: "Querying a File System"

-Your Drill installation includes a `sample-date` directory with Parquet files
-that you can query. Use SQL syntax to query the `region.parquet` and
-`nation.parquet` files in the `sample-data` directory.
-
-**Note:** Your Drill installation location may differ from the examples used 
here. The examples assume that Drill was installed in embedded mode on your 
machine following the [Apache Drill in 10 Minutes 
](https://cwiki.apache.org/confluence/display/DRILL/Apache+Drill+in+10+Minutes)tutorial.
 If you installed Drill in distributed mode, or your `sample-data` directory 
differs from the location used in the examples, make sure to change the 
`sample-data` directory to the correct location before you run the queries.
-
- Region File
-
-If you followed the Apache Drill in 10 Minutes instructions to install Drill
-in embedded mode, the path to the parquet file varies between operating
-systems.
-
-To view the data in the `region.parquet` file, issue the query appropriate for
-your operating system:
-
-  * Linux  
-``SELECT * FROM dfs.`/opt/drill/apache-drill-0.4.0-incubating/sample-
-data/region.parquet`; ``
-
-   * Mac OS X  
-``SELECT * FROM dfs.`/Users/max/drill/apache-drill-0.4.0-incubating/sample-
-data/region.parquet`;``
-
-   * Windows  
-``SELECT * FROM dfs.`C:\drill\apache-drill-0.4.0-incubating\sample-
-data\region.parquet`;``
-
-The query returns the following results:
-
-+++
-|   EXPR$0   |   EXPR$1   |
-+++
-| AFRICA | lar deposits. blithely final packages cajole. regular 
waters ar |
-

[13/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
DRILL-2315: Confluence conversion plus fixes


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/d959a210
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/d959a210
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/d959a210

Branch: refs/heads/gh-pages
Commit: d959a210053f02b5069f0a0cb9f0d34131640ffb
Parents: 23f82db
Author: Kristine Hahn 
Authored: Thu Jan 15 19:42:12 2015 -0800
Committer: Bridget Bevens 
Committed: Wed Feb 25 16:22:24 2015 -0800

--
 .gitignore  |   1 +
 _docs/001-arch.md   |  49 +++
 _docs/001-drill-docs.md |   4 -
 _docs/002-tutorial.md   |  51 +++
 _docs/003-yelp.md   | 412 ++
 _docs/004-install.md|  13 +
 _docs/005-connect.md|  41 ++
 _docs/006-interfaces.md |  50 +++
 _docs/007-query.md  |  41 ++
 _docs/008-sql-ref.md|  14 +
 _docs/009-dev-custom-func.md|  37 ++
 _docs/010-manage.md |  14 +
 _docs/011-develop.md|   9 +
 _docs/012-rn.md | 191 +
 _docs/013-contribute.md |   9 +
 _docs/014-sample-ds.md  |  10 +
 _docs/015-design.md |  13 +
 _docs/016-progress.md   |   8 +
 _docs/017-archived-pages.md |   8 +
 _docs/018-bylaws.md | 170 
 _docs/arch/001-core-mod.md  |  29 ++
 _docs/arch/002-arch-hilite.md   |  10 +
 _docs/arch/arch-hilite/001-flexibility.md   |  78 
 _docs/arch/arch-hilite/002-performance.md   |  55 +++
 _docs/archive/001-how-to-demo.md| 309 ++
 _docs/archive/002-meet-drill.md |  41 ++
 _docs/connect/001-plugin-reg.md |  35 ++
 _docs/connect/002-workspaces.md |  74 
 _docs/connect/003-reg-fs.md |  64 +++
 _docs/connect/004-reg-hbase.md  |  32 ++
 _docs/connect/005-reg-hive.md   |  83 
 _docs/connect/006-default-frmt.md   |  60 +++
 _docs/connect/007-mongo-plugin.md   | 167 
 _docs/connect/008-mapr-db-plugin.md |  31 ++
 _docs/contribute/001-guidelines.md  | 229 ++
 _docs/contribute/002-ideas.md   | 158 +++
 _docs/datasets/001-aol.md   |  47 +++
 _docs/datasets/002-enron.md |  19 +
 _docs/datasets/003-wikipedia.md | 105 +
 _docs/design/001-plan.md|  25 ++
 _docs/design/002-rpc.md |  19 +
 _docs/design/003-query-stages.md|  42 ++
 _docs/design/004-research.md|  48 +++
 _docs/design/005-value.md   | 163 +++
 _docs/dev-custom-fcn/001-dev-simple.md  |  50 +++
 _docs/dev-custom-fcn/002-dev-aggregate.md   |  55 +++
 _docs/dev-custom-fcn/003-add-custom.md  |  26 ++
 _docs/dev-custom-fcn/004-use-custom.md  |  55 +++
 _docs/dev-custom-fcn/005-cust-interface.md  |   8 +
 _docs/develop/001-compile.md|  37 ++
 _docs/develop/002-setup.md  |   5 +
 _docs/develop/003-patch-tool.md | 160 +++
 _docs/drill-docs/001-arch.md|  58 ---
 _docs/drill-docs/002-tutorial.md|  58 ---
 _docs/drill-docs/003-yelp.md| 402 --
 _docs/drill-docs/004-install.md |  20 -
 _docs/drill-docs/005-connect.md |  49 ---
 _docs/drill-docs/006-query.md   |  57 ---
 _docs/drill-docs/006-sql-ref.md |  25 --
 _docs/drill-docs/007-dev-custom-func.md |  47 ---
 _docs/drill-docs/008-manage.md  |  23 -
 _docs/drill-docs/009-develop.md |  16 -
 _docs/drill-docs/010-rn.md  | 192 -
 _docs/drill-docs/011-contribute.md  |  11 -
 _docs/drill-docs/012-sample-ds.md   |  11 -
 _docs/drill-docs/013-design.md  |  14 -
 _docs/drill-docs/014-progress.md|   9 -
 _docs/drill-docs/015-archived-pages.md  |   9 -
 _docs/drill-docs/016-bylaws.md  | 171 
 _docs/drill-docs/arch/001-core-mod.md   |  30 --
 _docs/drill-docs/arch/002-arch-hilite.md|  15 -
 .../arch/arch-hilite/001-flexibility.md |  79 
 .../arch/arch-hilite/002-performance.md |  56 ---
 _docs/drill-docs/archive/001-how-to-demo.md | 309 ---

[01/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
Repository: drill
Updated Branches:
  refs/heads/gh-pages 23f82db9f -> d959a2100


http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/tutorial/005-lesson3.md
--
diff --git a/_docs/tutorial/005-lesson3.md b/_docs/tutorial/005-lesson3.md
new file mode 100644
index 000..f6c7ae4
--- /dev/null
+++ b/_docs/tutorial/005-lesson3.md
@@ -0,0 +1,379 @@
+---
+title: "Lession 3: Run Queries on Complex Data Types"
+parent: "Apache Drill Tutorial"
+---
+## Goal
+
+This lesson focuses on queries that exercise functions and operators on self-
+describing data and complex data types. Drill offers intuitive SQL extensions
+to work with such data and offers high query performance with an architecture
+built from the ground up for complex data.
+
+## Queries in This Lesson
+
+Now that you have run ANSI SQL queries against different tables and files with
+relational data, you can try some examples including complex types.
+
+  * Access directories and subdirectories of files in a single SELECT 
statement.
+  * Demonstrate simple ways to access complex data in JSON files.
+  * Demonstrate the repeated_count function to aggregate values in an array.
+
+## Query Partitioned Directories
+
+You can use special variables in Drill to refer to subdirectories in your
+workspace path:
+
+  * dir0
+  * dir1
+  * …
+
+Note that these variables are dynamically determined based on the partitioning
+of the file system. No up-front definitions are required on what partitions
+exist. Here is a visual example of how this works:
+
+![drill query flow]({{ site.baseurl }}/docs/img/example_query.png)
+
+### Set workspace to dfs.logs:
+
+0: jdbc:drill:> use dfs.logs;
++++
+| ok | summary |
++++
+| true | Default schema changed to 'dfs.logs' |
++++
+
+### Query logs data for a specific year:
+
+0: jdbc:drill:> select * from logs where dir0='2013' limit 10;
+
+++++++++++---++
+| dir0 | dir1 | trans_id | date | time | cust_id | device | state | 
camp_id | keywords | prod_id | purch_flag |
+
+++++++++++---++
+| 2013 | 11 | 12119 | 11/09/2013 | 02:24:51 | 262 | IOS5 | ny | 0 | 
chamber | 198 | false |
+| 2013 | 11 | 12120 | 11/19/2013 | 09:37:43 | 0 | AOS4.4 | il | 2 | 
outside | 511 | false |
+| 2013 | 11 | 12134 | 11/10/2013 | 23:42:47 | 60343 | IOS5 | ma | 4 | and 
| 421 | false |
+| 2013 | 11 | 12135 | 11/16/2013 | 01:42:13 | 46762 | AOS4.3 | ca | 4 | 
here's | 349 | false |
+| 2013 | 11 | 12165 | 11/26/2013 | 21:58:09 | 41987 | AOS4.2 | mn | 4 | he 
| 271 | false |
+| 2013 | 11 | 12168 | 11/09/2013 | 23:41:48 | 8600 | IOS5 | in | 6 | i | 
459 | false |
+| 2013 | 11 | 12196 | 11/20/2013 | 02:23:06 | 15603 | IOS5 | tn | 1 | like 
| 324 | false |
+| 2013 | 11 | 12203 | 11/25/2013 | 23:50:29 | 221 | IOS6 | tx | 10 | if | 
323 | false |
+| 2013 | 11 | 12206 | 11/09/2013 | 23:53:01 | 2488 | AOS4.2 | tx | 14 | 
unlike | 296 | false |
+| 2013 | 11 | 12217 | 11/06/2013 | 23:51:56 | 0 | AOS4.2 | tx | 9 | can't 
| 54 | false |
+
+++++++++++++
+
+
+This query constrains files inside the subdirectory named 2013. The variable
+dir0 refers to the first level down from logs, dir1 to the next level, and so
+on. So this query returned 10 of the rows for February 2013.
+
+### Further constrain the results using multiple predicates in the query:
+
+This query returns a list of customer IDs for people who made a purchase via
+an IOS5 device in August 2013.
+
+0: jdbc:drill:> select dir0 as yr, dir1 as mth, cust_id from logs
+where dir0='2013' and dir1='8' and device='IOS5' and purch_flag='true'
+order by `date`;
+++++
+| yr | mth | cust_id |
+++++
+| 2013 | 8 | 4 |
+| 2013 | 8 | 521 |
+| 2013 | 8 | 1 |
+| 2013 | 8 | 2 |
+| 2013 | 8 | 4 |
+| 2013 | 8 | 549 |
+| 2013 | 8 | 72827 |
+| 2013 | 8 | 38127 |
+...
+
+### Return monthly counts per customer for a given year:
+
+0: jdbc:drill:> select cust_id, dir1 month_no, count(*) month_count from 
logs
+where dir0=2014 group by cust_id, dir1 order by cust_id, month_no limit 10;
++++-+
+|  cust_id   |  month_no  | month_count |
++++-+
+| 0  | 1  | 143 |
+| 0  | 2  | 118 |
+| 0  | 3   

[03/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/query/005-query-info-skema.md
--
diff --git a/_docs/query/005-query-info-skema.md 
b/_docs/query/005-query-info-skema.md
new file mode 100644
index 000..1ad0008
--- /dev/null
+++ b/_docs/query/005-query-info-skema.md
@@ -0,0 +1,109 @@
+---
+title: "Querying the INFORMATION SCHEMA"
+parent: "Query Data"
+---
+When you are using Drill to connect to multiple data sources, you need a
+simple mechanism to discover what each data source contains. The information
+schema is an ANSI standard set of metadata tables that you can query to return
+information about all of your Drill data sources (or schemas). Data sources
+may be databases or file systems; they are all known as "schemas" in this
+context. You can query the following INFORMATION_SCHEMA tables:
+
+  * SCHEMATA
+  * CATALOGS
+  * TABLES
+  * COLUMNS 
+  * VIEWS
+
+## SCHEMATA
+
+The SCHEMATA table contains the CATALOG_NAME and SCHEMA_NAME columns. To allow
+maximum flexibility inside BI tools, the only catalog that Drill supports is
+`DRILL`.
+
+0: jdbc:drill:zk=local> select CATALOG_NAME, SCHEMA_NAME as 
all_my_data_sources from INFORMATION_SCHEMA.SCHEMATA order by SCHEMA_NAME;
++--+-+
+| CATALOG_NAME | all_my_data_sources |
++--+-+
+| DRILL| INFORMATION_SCHEMA  |
+| DRILL| cp.default  |
+| DRILL| dfs.default |
+| DRILL| dfs.root|
+| DRILL| dfs.tmp |
+| DRILL| HiveTest.SalesDB|
+| DRILL| maprfs.logs |
+| DRILL| sys |
++--+-+
+
+The INFORMATION_SCHEMA name and associated keywords are case-sensitive. You
+can also return a list of schemas by running the SHOW DATABASES command:
+
+0: jdbc:drill:zk=local> show databases;
++-+
+| SCHEMA_NAME |
++-+
+| dfs.default |
+| dfs.root|
+| dfs.tmp |
+...
+
+## CATALOGS
+
+The CATALOGS table returns only one row, with the hardcoded DRILL catalog name
+and description.
+
+## TABLES
+
+The TABLES table returns the table name and type for each table or view in
+your databases. (Type means TABLE or VIEW.) Note that Drill does not return
+files available for querying in file-based data sources. Instead, use SHOW
+FILES to explore these data sources.
+
+## COLUMNS
+
+The COLUMNS table returns the column name and other metadata (such as the data
+type) for each column in each table or view.
+
+## VIEWS
+
+The VIEWS table returns the name and definition for each view in your
+databases. Note that file schemas are the canonical repository for views in
+Drill. Depending on how you create a view, the may only be displayed in Drill
+after it has been used.
+
+## Useful Queries
+
+Run an ``INFORMATION_SCHEMA.`TABLES` ``query to view all of the tables and 
views
+within a database. TABLES is a reserved word in Drill and requires back ticks
+(`).
+
+For example, the following query identifies all of the tables and views that
+Drill can access:
+
+SELECT TABLE_SCHEMA, TABLE_NAME, TABLE_TYPE
+FROM INFORMATION_SCHEMA.`TABLES`
+ORDER BY TABLE_NAME DESC;
+
+TABLE_SCHEMA TABLE_NAMETABLE_TYPE
+
+HiveTest.CustomersDB Customers TABLE
+HiveTest.SalesDB OrdersTABLE
+HiveTest.SalesDB OrderLinesTABLE
+HiveTest.SalesDB USOrders  VIEW
+dfs.default  CustomerSocialProfile VIEW
+
+
+**Note:** Currently, Drill only supports querying Drill views; Hive views are 
not yet supported.
+
+You can run a similar query to identify columns in tables and the data types
+of those columns:
+
+SELECT COLUMN_NAME, DATA_TYPE 
+FROM INFORMATION_SCHEMA.COLUMNS 
+WHERE TABLE_NAME = 'Orders' AND TABLE_SCHEMA = 'HiveTest.SalesDB' AND 
COLUMN_NAME LIKE '%Total';
++-++
+| COLUMN_NAME | DATA_TYPE  |
++-++
+| OrderTotal  | Decimal|
++-++
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/query/006-query-sys-tbl.md
--
diff --git a/_docs/query/006-query-sys-tbl.md b/_docs/query/006-query-sys-tbl.md
new file mode 100644
index 000..9b853ec
--- /dev/null
+++ b/_docs/query/006-query-sys-tbl.md
@@ -0,0 +1,159 @@
+---
+title: "Querying System Tables"
+parent: "Query Data"
+---
+Drill has a sys database that contains system tables. You can query the system
+tables for in

[04/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/interfaces/odbc-win/002-conf-odbc-win.md
--
diff --git a/_docs/interfaces/odbc-win/002-conf-odbc-win.md 
b/_docs/interfaces/odbc-win/002-conf-odbc-win.md
new file mode 100644
index 000..636bd9f
--- /dev/null
+++ b/_docs/interfaces/odbc-win/002-conf-odbc-win.md
@@ -0,0 +1,143 @@
+---
+title: "Step 2. Configure ODBC Connections to Drill Data Sources"
+parent: "Using the MapR ODBC Driver on Windows"
+---
+Complete one of the following steps to create an ODBC connection to Drill data
+sources:
+
+  * Create a Data Source Name
+  * Create an ODBC Connection String
+
+**Prerequisite:** An Apache Drill installation must be available that is 
configured to access the data sources that you want to connect to.  For 
information about how to install Apache Drill, see [Install 
Drill](/drill/docs/install-drill). For information about configuring data 
sources, see the [Apache Drill documentation](/drill/docs).
+
+## Create a Data Source Name (DSN)
+
+Create a DSN that an application can use to connect to Drill data sources. If
+you want to create a DSN for a 32-bit application, you must use the 32-bit
+version of the ODBC Administrator to create the DSN.
+
+  1. To launch the ODBC Administrator, click **Start > All Programs > MapR 
Drill ODBC Driver 1.0 (32|64-bit) > (32|64-bit) ODBC Administrator**.  
+The ODBC Data Source Administrator window appears.
+
+ To launch the 32-bit version of the ODBC driver on a 64-bit machine, run:
+`C:\WINDOWS\SysWOW64\odbcad32.exe`.
+  2. Click the **System DSN** tab to create a system DSN or click the **User 
DSN** tab to create a user DSN. A system DSN is available for all users who log 
in to the machine. A user DSN is available to the user who creates the DSN.
+  3. Click **Add**.
+  4. Select **MapR Drill ODBC Driver** and click **Finish**.  
+ The _MapR Drill ODBC Driver DSN Setup_ window appears.
+  5. In the **Data Source Name** field, enter a name for the DSN,
+  6. Optionally, enter a description of the DSN in the Description field.
+  7. In the Connection Type section, select a connection type and enter the 
associated connection details:
+
+ Connection 
TypePropertiesDescriptionsZookeeper QuorumQuorumA comma-separated 
list of servers in a Zookeeper cluster.For example, 
:5181,:5181,…ClusterIDName of the drillbit cluster. The 
default is drillbits1. You may need to specify a different value if the cluster 
ID was changed in the drill-override.conf file.Direct to Drillbit Provide the IP address or host name of the Drill 
server and the port number that that the Drill server is listening on.  The 
port number defaults to 31010. You may need to specify a different value if the 
port number was 
 changed in the drill-override.conf file.
+ For information on selecting the appropriate connection type, see 
[Connection
+Types](/drill/docs/step-2-configure-odbc-connections-to-drill-data-sources#connection-type).
+  8. In the **Default Schema** field, select the default schema that you want 
to connect to.
+ For more information about the schemas that appear in this list, see 
Schemas.
+  9. Optionally, perform one of the following operations:
+
+ OptionActionUpdate the configuration of the advanced properties.Edit the default values in the Advanced 
Properties section. For more information, see Advanced 
Properties.Configure the types of events 
that you want the driver to log.Click Logging 
Options. For more information, see Logging
 Options.Create views or explore Drill 
sources.Click Drill Explorer. For 
more information, see Using 
Drill Explorer to Browse Data and Create Views.
+  10. Click **OK** to save the DSN.
+
+## Configuration Options
+
+### Connection Type
+
+ODBC can connect directly to a Drillbit or to a ZooKeeper Quorum. Select your
+connection type based on your environment and Drillbit configuration.
+
+The following table lists the appropriate connection type for each scenario:
+
+ScenarioConnection TypeDrillbit is running in embedded mode.Direct 
to DrillbitDrillbit is registered with the 
ZooKeeper in a testing environment.ZooKeeper Quorum or 
Direct to DrillbitDrillbit is registered with 
the ZooKeeper in a production environment.ZooKeeper 
Quorum 
+
+ Connection to Zookeeper Quorum
+
+When you choose to connect to a ZooKeeper Quorum, the ODBC driver connects to
+the ZooKeeper Quorum to get a list of available Drillbits in the specified
+cluster. Then, the ODBC driver submits a query after selecting a Drillbit. All
+Drillbits in the cluster process the query and the Drillbit that received the
+query returns the query results.
+
+![ODBC to Quorum]({{ site.baseurl }}/docs/img/ODBC_to_Quorum.png)
+
+In a production environment, you should connect to a ZooKeeper Quorum for a
+more reliable connection. If one Drillbit is not available, another Drillbit
+that is 

[12/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/001-core-mod.md
--
diff --git a/_docs/arch/001-core-mod.md b/_docs/arch/001-core-mod.md
new file mode 100644
index 000..17fa18d
--- /dev/null
+++ b/_docs/arch/001-core-mod.md
@@ -0,0 +1,29 @@
+---
+title: "Core Modules within a Drillbit"
+parent: "Architectural Overview"
+---
+The following image represents components within each Drillbit:
+
+![drill query flow]({{ site.baseurl }}/docs/img/DrillbitModules.png)
+
+The following list describes the key components of a Drillbit:
+
+  * **RPC end point**: Drill exposes a low overhead protobuf-based RPC 
protocol to communicate with the clients. Additionally, a C++ and Java API 
layers are also available for the client applications to interact with Drill. 
Clients can communicate to a specific Drillbit directly or go through a 
ZooKeeper quorum to discover the available Drillbits before submitting queries. 
It is recommended that the clients always go through ZooKeeper to shield 
clients from the intricacies of cluster management, such as the addition or 
removal of nodes. 
+
+  * **SQL parser**: Drill uses Optiq, the open source framework, to parse 
incoming queries. The output of the parser component is a language agnostic, 
computer-friendly logical plan that represents the query. 
+  * **Storage plugin interfaces**: Drill serves as a query layer on top of 
several data sources. Storage plugins in Drill represent the abstractions that 
Drill uses to interact with the data sources. Storage plugins provide Drill 
with the following information:
+* Metadata available in the source
+* Interfaces for Drill to read from and write to data sources
+* Location of data and a set of optimization rules to help with efficient 
and faster execution of Drill queries on a specific data source 
+
+In the context of Hadoop, Drill provides storage plugins for files and
+HBase/M7. Drill also integrates with Hive as a storage plugin since Hive
+provides a metadata abstraction layer on top of files, HBase/M7, and provides
+libraries to read data and operate on these sources (Serdes and UDFs).
+
+When users query files and HBase/M7 with Drill, they can do it directly or 
go
+through Hive if they have metadata defined there. Drill integration with Hive
+is only for metadata. Drill does not invoke the Hive execution engine for any
+requests.
+
+  * **Distributed cache**: Drill uses a distributed cache to manage metadata 
(not the data) and configuration information across various nodes. Sample 
metadata information that is stored in the cache includes query plan fragments, 
intermediate state of the query execution, and statistics. Drill uses 
Infinispan as its cache technology.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/002-arch-hilite.md
--
diff --git a/_docs/arch/002-arch-hilite.md b/_docs/arch/002-arch-hilite.md
new file mode 100644
index 000..5ac51bc
--- /dev/null
+++ b/_docs/arch/002-arch-hilite.md
@@ -0,0 +1,10 @@
+---
+title: "Architectural Highlights"
+parent: "Architectural Overview"
+---
+The goal for Drill is to bring the **SQL Ecosystem** and **Performance** of
+the relational systems to **Hadoop scale** data **WITHOUT** compromising on
+the **Flexibility** of Hadoop/NoSQL systems. There are several core
+architectural elements in Apache Drill that make it a highly flexible and
+efficient query engine.
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/arch/arch-hilite/001-flexibility.md
--
diff --git a/_docs/arch/arch-hilite/001-flexibility.md 
b/_docs/arch/arch-hilite/001-flexibility.md
new file mode 100644
index 000..0b5c5e3
--- /dev/null
+++ b/_docs/arch/arch-hilite/001-flexibility.md
@@ -0,0 +1,78 @@
+---
+title: "Flexibility"
+parent: "Architectural Highlights"
+---
+The following features contribute to Drill's flexible architecture:
+
+**_Dynamic schema discovery_**
+
+Drill does not require schema or type specification for the data in order to
+start the query execution process. Instead, Drill starts processing the data
+in units called record-batches and discovers the schema on the fly during
+processing. Self-describing data formats such as Parquet, JSON, AVRO, and
+NoSQL databases have schema specified as part of the data itself, which Drill
+leverages dynamically at query time. Schema can change over the course of a
+Drill query, so all of the Drill operators are designed to reconfigure
+themselves when such schema changing events occur.
+
+**_Flexible data model_**
+
+Drill is purpose-built from the ground up for complex/multi-structured data
+commonly seen in Hadoop/NoSQL applications such as social/mobile, clickstream,
+logs, and sensor equipped IOT. From a user point of view, Drill a

[06/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/sql-ref/nested/001-flatten.md
--
diff --git a/_docs/drill-docs/sql-ref/nested/001-flatten.md 
b/_docs/drill-docs/sql-ref/nested/001-flatten.md
deleted file mode 100644
index 124db91..000
--- a/_docs/drill-docs/sql-ref/nested/001-flatten.md
+++ /dev/null
@@ -1,89 +0,0 @@

-title: "FLATTEN Function"
-parent: "Nested Data Functions"

-The FLATTEN function is useful for flexible exploration of repeated data.
-FLATTEN separates the elements in a repeated field into individual records. To
-maintain the association between each flattened value and the other fields in
-the record, all of the other columns are copied into each new record. A very
-simple example would turn this data (one record):
-
-{
-  "x" : 5,
-  "y" : "a string",
-  "z" : [ 1,2,3]
-}
-
-into three distinct records:
-
-select flatten(z) from table;
-| x   | y  | z |
-+-++---+
-| 5   | "a string" | 1 |
-| 5   | "a string" | 2 |
-| 5   | "a string" | 3 |
-
-The function takes a single argument, which must be an array (the `z` column
-in this example).
-
-  
-
-For a more interesting example, consider the JSON data in the publicly
-available [Yelp](https://www.yelp.com/dataset_challenge/dataset) data set. The
-first query below returns three columns from the
-`yelp_academic_dataset_business.json` file: `name`, `hours`, and `categories`.
-The query is restricted to distinct rows where the name is `z``pizza`. The
-query returns only one row that meets those criteria; however, note that this
-row contains an array of four categories:
-
-0: jdbc:drill:zk=local> select distinct name, hours, categories 
-from dfs.yelp.`yelp_academic_dataset_business.json` 
-where name ='zpizza';
-++++
-|name|   hours| categories |
-++++
-| zpizza | 
{"Tuesday":{"close":"22:00","open":"10:00"},"Friday":{"close":"23:00","open":"10:00"},"Monday":{"close":"22:00","open":"10:00"},"Wednesday":{"close":"22:00","open":"10:00"},"Thursday":{"close":"22:00","open":"10:00"},"Sunday":{"close":"22:00","open":"10:00"},"Saturday":{"close":"23:00","open":"10:00"}}
 | ["Gluten-Free","Pizza","Vegan","Restaurants"] |
-
-The FLATTEN function can operate on this single row and return multiple rows,
-one for each category:
-
-0: jdbc:drill:zk=local> select distinct name, flatten(categories) as 
categories 
-from dfs.yelp.`yelp_academic_dataset_business.json` 
-where name ='zpizza' order by 2;
-++-+
-|name| categories  |
-++-+
-| zpizza | Gluten-Free |
-| zpizza | Pizza   |
-| zpizza | Restaurants |
-| zpizza | Vegan   |
-++-+
-4 rows selected (2.797 seconds)
-
-Having used the FLATTEN function to break down arrays into distinct rows, you
-can run queries that do deeper analysis on the flattened result set. For
-example, you can use FLATTEN in a subquery, then apply WHERE clause
-constraints or aggregate functions to the results in the outer query.
-
-The following query uses the same data file as the previous query to flatten
-the categories array, then run a COUNT function on the flattened result:
-
-select celltbl.catl, count(celltbl.catl) catcount 
-from (select flatten(categories) catl 
-from dfs.yelp.`yelp_academic_dataset_business.json`) celltbl 
-group by celltbl.catl 
-order by count(celltbl.catl) desc limit 5;
- 
-+---++
-|catl   |  catcount  |
-+---++
-| Restaurants   | 14303  |
-| Shopping  | 6428   |
-| Food  | 5209   |
-| Beauty & Spas | 3421   |
-| Nightlife | 2870   |
-+---|+
-
-A common use case for FLATTEN is its use in conjunction with the
-[KVGEN](/confluence/display/DRILL/KVGEN+Function) function.
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/sql-ref/nested/002-kvgen.md
--
diff --git a/_docs/drill-docs/sql-ref/nested/002-kvgen.md 
b/_docs/drill-docs/sql-ref/nested/002-kvgen.md
deleted file mode 100644
index a27a781..000
--- a/_docs/drill-docs/sql-ref/nested/002-kvgen.md
+++ /dev/null
@@ -1,150 +0,0 @@

-title: "KVGEN Function"
-parent: "Nested Data Functions"

-KVGEN stands for _key-value generation_. This function is useful when complex
-data files contain arbitrary maps that consist of relatively "unknown" column
-names. Instead of having to specify columns in the map to access the data, you
-can use KVGEN to ret

[10/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/016-bylaws.md
--
diff --git a/_docs/drill-docs/016-bylaws.md b/_docs/drill-docs/016-bylaws.md
deleted file mode 100644
index 6f2604f..000
--- a/_docs/drill-docs/016-bylaws.md
+++ /dev/null
@@ -1,171 +0,0 @@

-title: "Project Bylaws"
-parent: "Apache Drill Documentation"

-# Introduction
-
-This document defines the bylaws under which the Apache Drill project
-operates. It defines the roles and responsibilities of the project, who may
-vote, how voting works, how conflicts are resolved, etc.
-
-Drill is a project of the [Apache Software
-Foundation](http://www.apache.org/foundation/). The foundation holds the
-copyright on Apache code including the code in the Drill codebase. The
-[foundation FAQ](http://www.apache.org/foundation/faq.html) explains the
-operation and background of the foundation.
-
-Drill is typical of Apache projects in that it operates under a set of
-principles, known collectively as the _Apache Way_. If you are new to Apache
-development, please refer to the [Incubator
-project](http://incubator.apache.org/) for more information on how Apache
-projects operate.
-
-# Roles and Responsibilities
-
-Apache projects define a set of roles with associated rights and
-responsibilities. These roles govern what tasks an individual may perform
-within the project. The roles are defined in the following sections.
-
-## Users
-
-The most important participants in the project are people who use our
-software. The majority of our contributors start out as users and guide their
-development efforts from the user's perspective.
-
-Users contribute to the Apache projects by providing feedback to contributors
-in the form of bug reports and feature suggestions. As well, users participate
-in the Apache community by helping other users on mailing lists and user
-support forums.
-
-## Contributors
-
-All of the volunteers who are contributing time, code, documentation, or
-resources to the Drill Project. A contributor that makes sustained, welcome
-contributions to the project may be invited to become a committer, though the
-exact timing of such invitations depends on many factors.
-
-## Committers
-
-The project's committers are responsible for the project's technical
-management. Committers have access to a specified set of subproject's code
-repositories. Committers on subprojects may cast binding votes on any
-technical discussion regarding that subproject.
-
-Committer access is by invitation only and must be approved by lazy consensus
-of the active PMC members. A Committer is considered _emeritus_ by his or her
-own declaration or by not contributing in any form to the project for over six
-months. An emeritus committer may request reinstatement of commit access from
-the PMC which will be sufficient to restore him or her to active committer
-status.
-
-Commit access can be revoked by a unanimous vote of all the active PMC members
-(except the committer in question if he or she is also a PMC member).
-
-All Apache committers are required to have a signed [Contributor License
-Agreement (CLA)](http://www.apache.org/licenses/icla.txt) on file with the
-Apache Software Foundation. There is a [Committer
-FAQ](http://www.apache.org/dev/committers.html) which provides more details on
-the requirements for committers.
-
-A committer who makes a sustained contribution to the project may be invited
-to become a member of the PMC. The form of contribution is not limited to
-code. It can also include code review, helping out users on the mailing lists,
-documentation, etc.
-
-## Project Management Committee
-
-The PMC is responsible to the board and the ASF for the management and
-oversight of the Apache Drill codebase. The responsibilities of the PMC
-include
-
-  * Deciding what is distributed as products of the Apache Drill project. In 
particular all releases must be approved by the PMC.
-  * Maintaining the project's shared resources, including the codebase 
repository, mailing lists, websites.
-  * Speaking on behalf of the project.
-  * Resolving license disputes regarding products of the project.
-  * Nominating new PMC members and committers.
-  * Maintaining these bylaws and other guidelines of the project.
-
-Membership of the PMC is by invitation only and must be approved by a lazy
-consensus of active PMC members. A PMC member is considered _emeritus_ by his
-or her own declaration or by not contributing in any form to the project for
-over six months. An emeritus member may request reinstatement to the PMC,
-which will be sufficient to restore him or her to active PMC member.
-
-Membership of the PMC can be revoked by an unanimous vote of all the active
-PMC members other than the member in question.
-
-The chair of the PMC is appointed by the ASF board. The chair is an office
-holder of the Apache Software Foundation (Vice President, Apache D

[02/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/sql-ref/cmd-summary/003-select.md
--
diff --git a/_docs/sql-ref/cmd-summary/003-select.md 
b/_docs/sql-ref/cmd-summary/003-select.md
new file mode 100644
index 000..4a4
--- /dev/null
+++ b/_docs/sql-ref/cmd-summary/003-select.md
@@ -0,0 +1,85 @@
+---
+title: "SELECT Statements"
+parent: "SQL Commands Summary"
+---
+Drill supports the following ANSI standard clauses in the SELECT statement:
+
+  * WITH clause
+  * SELECT list
+  * FROM clause
+  * WHERE clause
+  * GROUP BY clause
+  * HAVING clause
+  * ORDER BY clause (with an optional LIMIT clause)
+
+You can use the same SELECT syntax in the following commands:
+
+  * CREATE TABLE AS (CTAS)
+  * CREATE VIEW
+
+INSERT INTO SELECT is not yet supported.
+
+## Column Aliases
+
+You can use named column aliases in the SELECT list to provide meaningful
+names for regular columns and computed columns, such as the results of
+aggregate functions. See the section on running queries for examples.
+
+You cannot reference column aliases in the following clauses:
+
+  * WHERE
+  * GROUP BY
+  * HAVING
+
+Because Drill works with schema-less data sources, you cannot use positional
+aliases (1, 2, etc.) to refer to SELECT list columns, except in the ORDER BY
+clause.
+
+## UNION ALL Set Operator
+
+Drill supports the UNION ALL set operator to combine two result sets. The
+distinct UNION operator is not yet supported.
+
+The EXCEPT, EXCEPT ALL, INTERSECT, and INTERSECT ALL operators are not yet
+supported.
+
+## Joins
+
+Drill supports ANSI standard joins in the FROM and WHERE clauses:
+
+  * Inner joins
+  * Left, full, and right outer joins
+
+The following types of join syntax are supported:
+
+Join type| Syntax  
+---|---  
+Join condition in WHERE clause|FROM table1, table 2 WHERE 
table1.col1=table2.col1  
+USING join in FROM clause|FROM table1 JOIN table2 USING(col1, ...)  
+ON join in FROM clause|FROM table1 JOIN table2 ON table1.col1=table2.col1  
+NATURAL JOIN in FROM clause|FROM table 1 NATURAL JOIN table 2  
+
+Cross-joins are not yet supported. You must specify a join condition when more
+than one table is listed in the FROM clause.
+
+Non-equijoins are supported if the join also contains an equality condition on
+the same two tables as part of a conjunction:
+
+table1.col1 = table2.col1 AND table1.c2 < table2.c2
+
+This restriction applies to both inner and outer joins.
+
+## Subqueries
+
+You can use the following subquery operators in Drill queries. These operators
+all return Boolean results.
+
+  * ALL
+  * ANY
+  * EXISTS
+  * IN
+  * SOME
+
+In general, correlated subqueries are supported. EXISTS and NOT EXISTS
+subqueries that do not contain a correlation join are not yet supported.
+

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/sql-ref/cmd-summary/004-show-files.md
--
diff --git a/_docs/sql-ref/cmd-summary/004-show-files.md 
b/_docs/sql-ref/cmd-summary/004-show-files.md
new file mode 100644
index 000..1fcf395
--- /dev/null
+++ b/_docs/sql-ref/cmd-summary/004-show-files.md
@@ -0,0 +1,65 @@
+---
+title: "SHOW FILES Command"
+parent: "SQL Commands Summary"
+---
+The SHOW FILES command provides a quick report of the file systems that are
+visible to Drill for query purposes. This command is unique to Apache Drill.
+
+## Syntax
+
+The SHOW FILES command supports the following syntax.
+
+SHOW FILES [ FROM filesystem.directory_name | IN filesystem.directory_name 
];
+
+The FROM or IN clause is required if you do not specify a default file system
+first. You can do this with the USE command. FROM and IN are synonyms.
+
+The directory name is optional. (If the directory name is a Drill reserved
+word, you must use back ticks around the name.)
+
+The command returns standard Linux `stat` information for each file or
+directory, such as permissions, owner, and group values. This information is
+not specific to Drill.
+
+## Examples
+
+The following example returns information about directories and files in the
+local (`dfs`) file system.
+
+   0: jdbc:drill:> use dfs;
+
+   +++
+   | ok |  summary   |
+   +++
+   | true   | Default schema changed to 'dfs' |
+   +++
+   1 row selected (0.318 seconds)
+
+   0: jdbc:drill:> show files;
+   
++-+++++-++--+
+   |name| isDirectory |   isFile   |   length   |   owner|   
group| permissions | accessTime | modificationTime |
+   
++-+++++-++--+
+   | user   | true| false  | 1

[05/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/ngram_plugin2.png
--
diff --git a/_docs/img/ngram_plugin2.png b/_docs/img/ngram_plugin2.png
new file mode 100644
index 000..60d432d
Binary files /dev/null and b/_docs/img/ngram_plugin2.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/settings.png
--
diff --git a/_docs/img/settings.png b/_docs/img/settings.png
new file mode 100644
index 000..dcff0d9
Binary files /dev/null and b/_docs/img/settings.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/img/student_hive.png
--
diff --git a/_docs/img/student_hive.png b/_docs/img/student_hive.png
new file mode 100644
index 000..7e22b88
Binary files /dev/null and b/_docs/img/student_hive.png differ

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/install/001-drill-in-10.md
--
diff --git a/_docs/install/001-drill-in-10.md b/_docs/install/001-drill-in-10.md
new file mode 100644
index 000..13d2410
--- /dev/null
+++ b/_docs/install/001-drill-in-10.md
@@ -0,0 +1,365 @@
+---
+title: "Apache Drill in 10 Minutes"
+parent: "Install Drill"
+---
+* Objective
+* A Few Bits About Apache Drill
+* Process Overview
+* Install Drill
+  * Installing Drill on Linux
+  * Installing Drill on Mac OS X
+  * Installing Drill on Windows 
+* Start Drill 
+* Query Sample Data 
+* Summary 
+* Next Steps
+* More Information
+
+## Objective
+
+Use Apache Drill to query sample data in 10 minutes. For simplicity, you’ll
+run Drill in _embedded_ mode rather than _distributed_ mode to try out Drill
+without having to perform any setup tasks.
+
+## A Few Bits About Apache Drill
+
+Drill is a clustered, powerful MPP (Massively Parallel Processing) query
+engine for Hadoop that can process petabytes of data, fast. Drill is useful
+for short, interactive ad-hoc queries on large-scale data sets. Drill is
+capable of querying nested data in formats like JSON and Parquet and
+performing dynamic schema discovery. Drill does not require a centralized
+metadata repository.
+
+### **_Dynamic schema discovery_**
+
+Drill does not require schema or type specification for data in order to start
+the query execution process. Drill starts data processing in record-batches
+and discovers the schema during processing. Self-describing data formats such
+as Parquet, JSON, AVRO, and NoSQL databases have schema specified as part of
+the data itself, which Drill leverages dynamically at query time. Because
+schema can change over the course of a Drill query, all Drill operators are
+designed to reconfigure themselves when schemas change.
+
+### **_Flexible data model_**
+
+Drill allows access to nested data attributes, just like SQL columns, and
+provides intuitive extensions to easily operate on them. From an architectural
+point of view, Drill provides a flexible hierarchical columnar data model that
+can represent complex, highly dynamic and evolving data models. Drill allows
+for efficient processing of these models without the need to flatten or
+materialize them at design time or at execution time. Relational data in Drill
+is treated as a special or simplified case of complex/multi-structured data.
+
+### **_De-centralized metadata_**
+
+Drill does not have a centralized metadata requirement. You do not need to
+create and manage tables and views in a metadata repository, or rely on a
+database administrator group for such a function. Drill metadata is derived
+from the storage plugins that correspond to data sources. Storage plugins
+provide a spectrum of metadata ranging from full metadata (Hive), partial
+metadata (HBase), or no central metadata (files). De-centralized metadata
+means that Drill is NOT tied to a single Hive repository. You can query
+multiple Hive repositories at once and then combine the data with information
+from HBase tables or with a file in a distributed file system. You can also
+use SQL DDL syntax to create metadata within Drill, which gets organized just
+like a traditional database. Drill metadata is accessible through the ANSI
+standard INFORMATION_SCHEMA database.
+
+### **_Extensibility_**
+
+Drill provides an extensible architecture at all layers, including the storage
+plugin, query, query optimization/execution, and client API layers. You can
+customize any layer for the specific needs of an organization or you can
+extend the layer to a broader array of use cases. Drill provides a built in
+classpath scanning and plugin concept to add additional storage plugins,
+functions, and operators with minimal configuration.
+
+## Process Overview
+
+Download the Apache Drill archive and extract the contents to a directory on
+your machine. The Apache Drill archiv

[11/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/design/005-value.md
--
diff --git a/_docs/design/005-value.md b/_docs/design/005-value.md
new file mode 100644
index 000..828376a
--- /dev/null
+++ b/_docs/design/005-value.md
@@ -0,0 +1,163 @@
+---
+title: "Value Vectors"
+parent: "Design Docs"
+---
+This document defines the data structures required for passing sequences of
+columnar data between 
[Operators](https://docs.google.com/a/maprtech.com/document/d/1zaxkcrK9mYyfpGwX1kAV80z0PCi8abefL45zOzb97dI/edit#bookmark=id.iip15ful18mm).
+
+## Goals
+
+### Support Operators Written in Multiple Language
+
+ValueVectors should support operators written in C/C++/Assembly. To support
+this, the underlying ByteBuffer will not require modification when passed
+through the JNI interface. The ValueVector will be considered immutable once
+constructed. Endianness has not yet been considered.
+
+### Access
+
+Reading a random element from a ValueVector must be a constant time operation.
+To accomodate, elements are identified by their offset from the start of the
+buffer. Repeated, nullable and variable width ValueVectors utilize in an
+additional fixed width value vector to index each element. Write access is not
+supported once the ValueVector has been constructed by the RecordBatch.
+
+### Efficient Subsets of Value Vectors
+
+When an operator returns a subset of values from a ValueVector, it should
+reuse the original ValueVector. To accomplish this, a level of indirection is
+introduced to skip over certain values in the vector. This level of
+indirection is a sequence of offsets which reference an offset in the original
+ValueVector and the count of subsequent values which are to be included in the
+subset.
+
+### Pooled Allocation
+
+ValueVectors utilize one or more buffers under the covers. These buffers will
+be drawn from a pool. Value vectors are themselves created and destroyed as a
+schema changes during the course of record iteration.
+
+### Homogenous Value Types
+
+Each value in a Value Vector is of the same type. The [Record 
Batch](https://docs.google.com/a/maprtech.com/document/d/1zaxkcrK9mYyfpGwX1kAV80z0PCi8abefL45zOzb97dI/edit#bookmark=kix.s2xuoqnr8obe)
 implementation is responsible for
+creating a new Value Vector any time there is a change in schema.
+
+## Definitions
+
+Data Types
+
+The canonical source for value type definitions is the [Drill
+Datatypes](http://bit.ly/15JO9bC) document. The individual types are listed
+under the ‘Basic Data Types’ tab, while the value vector types can be found
+under the ‘Value Vectors’ tab.
+
+Operators
+
+An operator is responsible for transforming a stream of fields. It operates on
+Record Batches or constant values.
+
+Record Batch
+
+A set of field values for some range of records. The batch may be composed of
+Value Vectors, in which case each batch consists of exactly one schema.
+
+Value Vector
+
+The value vector is comprised of one or more contiguous buffers; one which
+stores a sequence of values, and zero or more which store any metadata
+associated with the ValueVector.
+
+## Data Structure
+
+A ValueVector stores values in a ByteBuf, which is a contiguous region of
+memory. Additional levels of indirection are used to support variable value
+widths, nullable values, repeated values and selection vectors. These levels
+of indirection are primarily lookup tables which consist of one or more fixed
+width ValueVectors which may be combined (e.g. for nullable, variable width
+values). A fixed width ValueVector of non-nullable, non-repeatable values does
+not require an indirect lookup; elements can be accessed directly by
+multiplying position by stride.
+
+Fixed Width Values
+
+Fixed width ValueVectors simply contain a packed sequence of values. Random
+access is supported by accessing element n at ByteBuf[0] + Index * Stride,
+where Index is 0-based. The following illustrates the underlying buffer of
+INT4 values [1 .. 6]:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value1.png)
+
+Nullable Values
+
+Nullable values are represented by a vector of bit values. Each bit in the
+vector corresponds to an element in the ValueVector. If the bit is not set,
+the value is NULL. Otherwise the value is retrieved from the underlying
+buffer. The following illustrates a NullableValueVector of INT4 values 2, 3
+and 6:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value2.png)
+  
+### Repeated Values
+
+A repeated ValueVector is used for elements which can contain multiple values
+(e.g. a JSON array). A table of offset and count pairs is used to represent
+each repeated element in the ValueVector. A count of zero means the element
+has no values (note the offset field is unused in this case). The following
+illustrates three fields; one with two values, one with no values, and one
+with a single value:
+
+![drill query flow]({{ site.baseurl }}/docs/img/value3

[08/13] drill git commit: DRILL-2315: Confluence conversion plus fixes

2015-02-25 Thread bridgetb
http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/manage/004-partition-prune.md
--
diff --git a/_docs/drill-docs/manage/004-partition-prune.md 
b/_docs/drill-docs/manage/004-partition-prune.md
deleted file mode 100644
index fa81034..000
--- a/_docs/drill-docs/manage/004-partition-prune.md
+++ /dev/null
@@ -1,75 +0,0 @@

-title: "Partition Pruning"
-parent: "Manage Drill"

-Partition pruning is a performance optimization that limits the number of
-files and partitions that Drill reads when querying file systems and Hive
-tables. Drill only reads a subset of the files that reside in a file system or
-a subset of the partitions in a Hive table when a query matches certain filter
-criteria.
-
-For Drill to apply partition pruning to Hive tables, you must have created the
-tables in Hive using the `PARTITION BY` clause:
-
-`CREATE TABLE  () PARTITION BY ();`
-
-When you create Hive tables using the `PARTITION BY` clause, each partition of
-data is automatically split out into different directories as data is written
-to disk. For more information about Hive partitioning, refer to the [Apache
-Hive 
wiki](https://cwiki.apache.org/confluence/display/Hive/LanguageManual+DDL/#LanguageManualDDL-PartitionedTables).
-
-Typically, table data in a file system is organized by directories and
-subdirectories. Queries on table data may contain `WHERE` clause filters on
-specific directories.
-
-Drill’s query planner evaluates the filters as part of a Filter operator. If
-no partition filters are present, the underlying Scan operator reads all files
-in all directories and then sends the data to operators downstream, such as
-Filter.
-
-When partition filters are present, the query planner determines if it can
-push the filters down to the Scan such that the Scan only reads the
-directories that match the partition filters, thus reducing disk I/O.
-
-## Partition Pruning Example
-
-The /`Users/max/data/logs` directory in a file system contains subdirectories
-that span a few years.
-
-The following image shows the hierarchical structure of the `…/logs` 
directory
-and (sub) directories:
-
-![](../../img/54.png)
-
-The following query requests log file data for 2013 from the `…/logs`
-directory in the file system:
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 and dir0 = 
2013 limit 2;
-
-If you run the `EXPLAIN PLAN` command for the query, you can see that the`
-…/logs` directory is filtered by the scan operator.
-
-EXPLAIN PLAN FOR SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 
10 and dir0 = 2013 limit 2;
-
-The following image shows a portion of the physical plan when partition
-pruning is applied:
-
-![](../../img/21.png)
-
-## Filter Examples
-
-The following queries include examples of the types of filters eligible for
-partition pruning optimization:
-
-**Example 1: Partition filters ANDed together**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE dir0 = '2014' AND dir1 = '1'
-
-**Example 2: Partition filter ANDed with regular column filter**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE cust_id < 10 AND dir0 = 
2013 limit 2;
-
-**Example 3: Combination of AND, OR involving partition filters**
-
-SELECT * FROM dfs.`/Users/max/data/logs` WHERE (dir0 = '2013' AND dir1 = 
'1') OR (dir0 = '2014' AND dir1 = '2')
-

http://git-wip-us.apache.org/repos/asf/drill/blob/d959a210/_docs/drill-docs/manage/005-monitor-cancel.md
--
diff --git a/_docs/drill-docs/manage/005-monitor-cancel.md 
b/_docs/drill-docs/manage/005-monitor-cancel.md
deleted file mode 100644
index 6888eea..000
--- a/_docs/drill-docs/manage/005-monitor-cancel.md
+++ /dev/null
@@ -1,30 +0,0 @@

-title: "Monitoring and Canceling Queries in the Drill Web UI"
-parent: "Manage Drill"

-You can monitor and cancel queries from the Drill Web UI. To access the Drill
-Web UI, the Drillbit process must be running on the Drill node that you use to
-access the Drill Web UI.
-
-To monitor or cancel a query from the Drill Web UI, complete the following
-steps:
-
-  1. Navigate to the Drill Web UI at `:8047.`  
-When you access the Drill Web UI, you see some general information about Drill
-running in your cluster, such as the nodes running the Drillbit process, the
-various ports Drill is using, and the amount of direct memory assigned to
-Drill.  
-![](../../img/7.png)
-
-  2. Select **Profiles** in the toolbar. A list of running and completed 
queries appears. Drill assigns a query ID to each query and lists the Foreman 
node. The Foreman is the Drillbit node that receives the query from the client 
or application. The Foreman drives the entire query.  
-![](../../img/51.png)
-
-  3. Click the **Query ID** for the query that you want to monitor or cancel. 
The Query and Planning window appears.  
-![](../../img/4.png)
-
-  4. Selec

Git Push Summary

2015-02-25 Thread adi
Repository: drill
Updated Branches:
  refs/heads/gh-pages-master [created] 23f82db9f


[1/2] drill git commit: DRILL-2130: Fixed JUnit/Hamcrest/Mockito/Paranamer class path problem.

2015-02-25 Thread adi
Repository: drill
Updated Branches:
  refs/heads/master 8bb6b08e5 -> f7ef5ec78


DRILL-2130: Fixed JUnit/Hamcrest/Mockito/Paranamer class path problem.


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/b0faf708
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/b0faf708
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/b0faf708

Branch: refs/heads/master
Commit: b0faf708bdbeb53bc3a446d3782554640bdfd6df
Parents: 8bb6b08
Author: dbarclay 
Authored: Sun Feb 22 00:45:42 2015 -0800
Committer: Aditya Kishore 
Committed: Wed Feb 25 11:08:20 2015 -0800

--
 ...rill2130CommonHamcrestConfigurationTest.java | 46 
 ...30StorageHBaseHamcrestConfigurationTest.java | 46 
 ...torageHiveCoreHamcrestConfigurationTest.java | 46 
 ...130InterpreterHamcrestConfigurationTest.java | 46 
 exec/java-exec/pom.xml  |  9 
 ...ll2130JavaExecHamcrestConfigurationTest.java | 46 
 ...ll2130JavaJdbcHamcrestConfigurationTest.java | 46 
 pom.xml |  8 
 8 files changed, 293 insertions(+)
--


http://git-wip-us.apache.org/repos/asf/drill/blob/b0faf708/common/src/test/java/org/apache/drill/test/Drill2130CommonHamcrestConfigurationTest.java
--
diff --git 
a/common/src/test/java/org/apache/drill/test/Drill2130CommonHamcrestConfigurationTest.java
 
b/common/src/test/java/org/apache/drill/test/Drill2130CommonHamcrestConfigurationTest.java
new file mode 100644
index 000..99643b1
--- /dev/null
+++ 
b/common/src/test/java/org/apache/drill/test/Drill2130CommonHamcrestConfigurationTest.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.drill.test;
+
+import org.junit.Test;
+import static org.junit.Assert.assertThat;
+import static org.junit.Assert.fail;
+import static org.hamcrest.CoreMatchers.equalTo;
+
+
+public class Drill2130CommonHamcrestConfigurationTest {
+
+  @SuppressWarnings("unused")
+  private org.hamcrest.MatcherAssert forCompileTimeCheckForNewEnoughHamcrest;
+
+  @Test
+  public void testJUnitHamcrestMatcherFailureWorks() {
+try {
+  assertThat( 1, equalTo( 2 ) );
+}
+catch ( NoSuchMethodError e ) {
+  fail( "Class search path seems broken re new JUnit and old Hamcrest."
+ + "  Got NoSuchMethodError;  e: " + e );
+}
+catch ( AssertionError e ) {
+  System.out.println( "Class path seems fine re new JUnit vs. old 
Hamcrest."
+  + " (Got AssertionError, not NoSuchMethodError.)" );
+}
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/drill/blob/b0faf708/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/test/Drill2130StorageHBaseHamcrestConfigurationTest.java
--
diff --git 
a/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/test/Drill2130StorageHBaseHamcrestConfigurationTest.java
 
b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/test/Drill2130StorageHBaseHamcrestConfigurationTest.java
new file mode 100644
index 000..b52654d
--- /dev/null
+++ 
b/contrib/storage-hbase/src/test/java/org/apache/drill/hbase/test/Drill2130StorageHBaseHamcrestConfigurationTest.java
@@ -0,0 +1,46 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHO

[2/2] drill git commit: DRILL-1690: Issue with using HBase plugin to access row_key only

2015-02-25 Thread adi
DRILL-1690: Issue with using HBase plugin to access row_key only


Project: http://git-wip-us.apache.org/repos/asf/drill/repo
Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/f7ef5ec7
Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/f7ef5ec7
Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/f7ef5ec7

Branch: refs/heads/master
Commit: f7ef5ec784844a99b8b39fe10ab14f001ae149f2
Parents: b0faf70
Author: Aditya Kishore 
Authored: Wed Feb 25 01:10:48 2015 -0800
Committer: Aditya Kishore 
Committed: Wed Feb 25 11:17:06 2015 -0800

--
 .../exec/store/hbase/HBaseRecordReader.java | 35 +++-
 1 file changed, 19 insertions(+), 16 deletions(-)
--


http://git-wip-us.apache.org/repos/asf/drill/blob/f7ef5ec7/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRecordReader.java
--
diff --git 
a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRecordReader.java
 
b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRecordReader.java
index da38707..42038e8 100644
--- 
a/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRecordReader.java
+++ 
b/contrib/storage-hbase/src/main/java/org/apache/drill/exec/store/hbase/HBaseRecordReader.java
@@ -72,6 +72,8 @@ public class HBaseRecordReader extends AbstractRecordReader 
implements DrillHBas
   private Configuration hbaseConf;
   private OperatorContext operatorContext;
 
+  private boolean rowKeyOnly;
+
   public HBaseRecordReader(Configuration conf, HBaseSubScan.HBaseSubScanSpec 
subScanSpec,
   List projectedColumns, FragmentContext context) throws 
OutOfMemoryException {
 hbaseConf = conf;
@@ -87,8 +89,8 @@ public class HBaseRecordReader extends AbstractRecordReader 
implements DrillHBas
   @Override
   protected Collection transformColumns(Collection 
columns) {
 Set transformed = Sets.newLinkedHashSet();
+rowKeyOnly = true;
 if (!isStarQuery()) {
-  boolean rowKeyOnly = true;
   for (SchemaPath column : columns) {
 if (column.getRootSegment().getPath().equalsIgnoreCase(ROW_KEY)) {
   transformed.add(ROW_KEY_PATH);
@@ -116,6 +118,7 @@ public class HBaseRecordReader extends AbstractRecordReader 
implements DrillHBas
 HBaseUtils.andFilterAtIndex(hbaseScan.getFilter(), 
HBaseUtils.LAST_FILTER, new FirstKeyOnlyFilter()));
   }
 } else {
+  rowKeyOnly = false;
   transformed.add(ROW_KEY_PATH);
 }
 
@@ -131,7 +134,6 @@ public class HBaseRecordReader extends AbstractRecordReader 
implements DrillHBas
 this.operatorContext = operatorContext;
   }
 
-
   @Override
   public void setup(OutputMutator output) throws ExecutionSetupException {
 this.outputMutator = output;
@@ -197,22 +199,23 @@ public class HBaseRecordReader extends 
AbstractRecordReader implements DrillHBas
   if (rowKeyVector != null) {
 rowKeyVector.getMutator().setSafe(rowCount, cells[0].getRowArray(), 
cells[0].getRowOffset(), cells[0].getRowLength());
   }
+  if (!rowKeyOnly) {
+for (Cell cell : cells) {
+  int familyOffset = cell.getFamilyOffset();
+  int familyLength = cell.getFamilyLength();
+  byte[] familyArray = cell.getFamilyArray();
+  MapVector mv = getOrCreateFamilyVector(new String(familyArray, 
familyOffset, familyLength), true);
 
-  for (Cell cell : cells) {
-int familyOffset = cell.getFamilyOffset();
-int familyLength = cell.getFamilyLength();
-byte[] familyArray = cell.getFamilyArray();
-MapVector mv = getOrCreateFamilyVector(new String(familyArray, 
familyOffset, familyLength), true);
+  int qualifierOffset = cell.getQualifierOffset();
+  int qualifierLength = cell.getQualifierLength();
+  byte[] qualifierArray = cell.getQualifierArray();
+  NullableVarBinaryVector v = getOrCreateColumnVector(mv, new 
String(qualifierArray, qualifierOffset, qualifierLength));
 
-int qualifierOffset = cell.getQualifierOffset();
-int qualifierLength = cell.getQualifierLength();
-byte[] qualifierArray = cell.getQualifierArray();
-NullableVarBinaryVector v = getOrCreateColumnVector(mv, new 
String(qualifierArray, qualifierOffset, qualifierLength));
-
-int valueOffset = cell.getValueOffset();
-int valueLength = cell.getValueLength();
-byte[] valueArray = cell.getValueArray();
-v.getMutator().setSafe(rowCount, valueArray, valueOffset, valueLength);
+  int valueOffset = cell.getValueOffset();
+  int valueLength = cell.getValueLength();
+  byte[] valueArray = cell.getValueArray();
+  v.getMutator().setSafe(rowCount, valueArray, valueOffset, 
valueLength);
+}
   }
 }