[1/2] incubator-kylin git commit: KYLIN-742 Route unsupported queries to Hive

liyang Tue, 29 Sep 2015 02:48:08 -0700

Repository: incubator-kylin
Updated Branches:
  refs/heads/1.x-staging 67322634a -> 143aa608b



KYLIN-742 Route unsupported queries to Hive

Signed-off-by: Li, Yang <yang...@ebay.com>


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/79904380
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/79904380
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/79904380

Branch: refs/heads/1.x-staging
Commit: 7990438023e898f1f01a62185f5607b72ef82643
Parents: 6732263
Author: murkrishn <murkris...@ebay.com>
Authored: Mon Sep 14 23:51:39 2015 -0700
Committer: Li, Yang <yang...@ebay.com>
Committed: Tue Sep 29 15:40:26 2015 +0800

----------------------------------------------------------------------
 .../org/apache/kylin/common/KylinConfig.java    |  27 ++
 .../test_case_data/sandbox/kylin.properties     | 278 ++++++++++---------
 .../apache/kylin/rest/service/QueryService.java |  29 +-
 .../apache/kylin/rest/util/HiveRerouteUtil.java | 133 +++++++++
 4 files changed, 330 insertions(+), 137 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/common/src/main/java/org/apache/kylin/common/KylinConfig.java
----------------------------------------------------------------------
diff --git a/common/src/main/java/org/apache/kylin/common/KylinConfig.java 
b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
index d3220ee..72b6a65 100644
--- a/common/src/main/java/org/apache/kylin/common/KylinConfig.java
+++ b/common/src/main/java/org/apache/kylin/common/KylinConfig.java
@@ -132,6 +132,15 @@ public class KylinConfig {
 
     public static final String HBASE_REGION_COUNT_MIN = 
"kylin.hbase.region.count.min";
     public static final String HBASE_REGION_COUNT_MAX = 
"kylin.hbase.region.count.max";
+
+    // property for Hive query rerouting enablement
+    public static final String KYLIN_ROUTE_HIVE_ENABLED = 
"kylin.route.hive.enabled";
+    public static final boolean KYLIN_ROUTE_HIVE_ENABLED_DEFAULT = false;
+    // JDBC Hive connection details for query rerouting
+    public static final String KYLIN_ROUTE_HIVE_URL = "kylin.route.hive.url";
+    public static final String KYLIN_ROUTE_HIVE_URL_DEFAULT = 
"jdbc:hive2://sandbox:10000";
+    public static final String KYLIN_ROUTE_HIVE_USERNAME = 
"kylin.route.hive.username";
+    public static final String KYLIN_ROUTE_HIVE_PASSWORD = 
"kylin.route.hive.password";
     
     // static cached instances
     private static KylinConfig ENV_INSTANCE = null;
@@ -265,6 +274,24 @@ public class KylinConfig {
 
     // 
============================================================================
 
+    // start: properties for Hive rerouting
+    public boolean isHiveReroutingEnabled() {
+       return Boolean.parseBoolean(getOptional(KYLIN_ROUTE_HIVE_ENABLED)); 
+    }
+
+    public String getHiveRerouteUrl() {
+        return getOptional(KYLIN_ROUTE_HIVE_URL, KYLIN_ROUTE_HIVE_URL_DEFAULT);
+    }
+
+    public String getHiveRerouteUsername() {
+        return getOptional(KYLIN_ROUTE_HIVE_USERNAME, "");
+    }
+
+    public String getHiveReroutePassword() {
+        return getOptional(KYLIN_ROUTE_HIVE_PASSWORD, "");
+    }
+    // end: properties for JDBC Hive rerouting
+
     public String getStorageUrl() {
         return storageUrl;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/examples/test_case_data/sandbox/kylin.properties
----------------------------------------------------------------------
diff --git a/examples/test_case_data/sandbox/kylin.properties 
b/examples/test_case_data/sandbox/kylin.properties
index bcae574..6fe63a8 100644
--- a/examples/test_case_data/sandbox/kylin.properties
+++ b/examples/test_case_data/sandbox/kylin.properties
@@ -1,136 +1,142 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-
-## Config for Kylin Engine ##
-
-# List of web servers in use, this enables one web server instance to sync up 
with other servers.
-kylin.rest.servers=localhost:7070
-
-#set display timezone on UI,format like[GMT+N or GMT-N]
-kylin.rest.timezone=GMT-8
-
-# The metadata store in hbase
-kylin.metadata.url=kylin_metadata@hbase
-
-# The storage for final cube file in hbase
-kylin.storage.url=hbase
-
-# Temp folder in hdfs, make sure user has the right access to the hdfs 
directory
-kylin.hdfs.working.dir=/kylin
-
-kylin.job.mapreduce.default.reduce.input.mb=500
-
-# If true, job engine will not assume that hadoop CLI reside on the same 
server as it self
-# you will have to specify kylin.job.remote.cli.hostname, 
kylin.job.remote.cli.username and kylin.job.remote.cli.password
-kylin.job.run.as.remote.cmd=false
-
-# Only necessary when kylin.job.run.as.remote.cmd=true
-kylin.job.remote.cli.hostname=
-
-# Only necessary when kylin.job.run.as.remote.cmd=true
-kylin.job.remote.cli.username=
-
-# Only necessary when kylin.job.run.as.remote.cmd=true
-kylin.job.remote.cli.password=
-
-# Used by test cases to prepare synthetic data for sample cube
-kylin.job.remote.cli.working.dir=/tmp/kylin
-
-# Max count of concurrent jobs running
-kylin.job.concurrent.max.limit=10
-
-# Time interval to check hadoop job status
-kylin.job.yarn.app.rest.check.interval.seconds=10
-
-# Hive database name for putting the intermediate flat tables
-kylin.job.hive.database.for.intermediatetable=default
-
-#default compression codec for htable,snappy,lzo,gzip,lz4
-kylin.hbase.default.compression.codec=gzip
-
-# The cut size for hbase region, in GB.
-# E.g, for cube whose capacity be marked as "SMALL", split region per 10GB by 
default
-kylin.hbase.region.cut.small=10
-kylin.hbase.region.cut.medium=20
-kylin.hbase.region.cut.large=100
-
-# HBase min and max region count
-kylin.hbase.region.count.min=1
-kylin.hbase.region.count.min=500
-
-## Config for Restful APP ##
-# database connection settings:
-ldap.server=
-ldap.username=
-ldap.password=
-ldap.user.searchBase=
-ldap.user.searchPattern=
-ldap.user.groupSearchBase=
-ldap.service.searchBase=OU=
-ldap.service.searchPattern=
-ldap.service.groupSearchBase=
-acl.adminRole=
-acl.defaultRole=
-ganglia.group=
-ganglia.port=8664
-
-## Config for mail service
-
-# If true, will send email notification;
-mail.enabled=false
-mail.host=
-mail.username=
-mail.password=
-mail.sender=
-
-###########################config info for web#######################
-
-#help info ,format{name|displayName|link} ,optional
-kylin.web.help.length=4
-kylin.web.help.0=start|Getting Started|
-kylin.web.help.1=odbc|ODBC Driver|
-kylin.web.help.2=tableau|Tableau Guide|
-kylin.web.help.3=onboard|Cube Design Tutorial|
-#hadoop url link ,optional
-kylin.web.hadoop=
-#job diagnostic url link ,optional
-kylin.web.diagnostic=
-#contact mail on web page ,optional
-kylin.web.contact_mail=
-
-###########################config info for front#######################
-
-#env DEV|QA|PROD
-deploy.env=DEV
-
-###########################config info for sandbox#######################
-kylin.sandbox=true
-
-
-###########################config info for kylin monitor#######################
-# hive jdbc url
-kylin.monitor.hive.jdbc.connection.url= jdbc:hive2://sandbox:10000
-
-#config where to parse query log,split with comma ,will also read 
$KYLIN_HOME/tomcat/logs/ by default
-kylin.monitor.ext.log.base.dir = /tmp/kylin_log1,/tmp/kylin_log2
-
-#will create external hive table to query result csv file
-#will set to kylin_query_log by default if not config here
-kylin.monitor.query.log.parse.result.table = kylin_query_log
-
-
-
+#
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#    http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+## Config for Kylin Engine ##
+
+# List of web servers in use, this enables one web server instance to sync up 
with other servers.
+kylin.rest.servers=localhost:7070
+
+#set display timezone on UI,format like[GMT+N or GMT-N]
+kylin.rest.timezone=GMT-8
+
+# The metadata store in hbase
+kylin.metadata.url=kylin_metadata@hbase
+
+# The storage for final cube file in hbase
+kylin.storage.url=hbase
+
+# Temp folder in hdfs, make sure user has the right access to the hdfs 
directory
+kylin.hdfs.working.dir=/kylin
+
+kylin.job.mapreduce.default.reduce.input.mb=500
+
+# If true, job engine will not assume that hadoop CLI reside on the same 
server as it self
+# you will have to specify kylin.job.remote.cli.hostname, 
kylin.job.remote.cli.username and kylin.job.remote.cli.password
+kylin.job.run.as.remote.cmd=false
+
+# Only necessary when kylin.job.run.as.remote.cmd=true
+kylin.job.remote.cli.hostname=
+
+# Only necessary when kylin.job.run.as.remote.cmd=true
+kylin.job.remote.cli.username=
+
+# Only necessary when kylin.job.run.as.remote.cmd=true
+kylin.job.remote.cli.password=
+
+# Used by test cases to prepare synthetic data for sample cube
+kylin.job.remote.cli.working.dir=/tmp/kylin
+
+# Max count of concurrent jobs running
+kylin.job.concurrent.max.limit=10
+
+# Time interval to check hadoop job status
+kylin.job.yarn.app.rest.check.interval.seconds=10
+
+# Hive database name for putting the intermediate flat tables
+kylin.job.hive.database.for.intermediatetable=default
+
+#default compression codec for htable,snappy,lzo,gzip,lz4
+kylin.hbase.default.compression.codec=gzip
+
+# The cut size for hbase region, in GB.
+# E.g, for cube whose capacity be marked as "SMALL", split region per 10GB by 
default
+kylin.hbase.region.cut.small=10
+kylin.hbase.region.cut.medium=20
+kylin.hbase.region.cut.large=100
+
+# HBase min and max region count
+kylin.hbase.region.count.min=1
+kylin.hbase.region.count.min=500
+
+## Config for Restful APP ##
+# database connection settings:
+ldap.server=
+ldap.username=
+ldap.password=
+ldap.user.searchBase=
+ldap.user.searchPattern=
+ldap.user.groupSearchBase=
+ldap.service.searchBase=OU=
+ldap.service.searchPattern=
+ldap.service.groupSearchBase=
+acl.adminRole=
+acl.defaultRole=
+ganglia.group=
+ganglia.port=8664
+
+## Config for mail service
+
+# If true, will send email notification;
+mail.enabled=false
+mail.host=
+mail.username=
+mail.password=
+mail.sender=
+
+###########################config info for web#######################
+
+#help info ,format{name|displayName|link} ,optional
+kylin.web.help.length=4
+kylin.web.help.0=start|Getting Started|
+kylin.web.help.1=odbc|ODBC Driver|
+kylin.web.help.2=tableau|Tableau Guide|
+kylin.web.help.3=onboard|Cube Design Tutorial|
+#hadoop url link ,optional
+kylin.web.hadoop=
+#job diagnostic url link ,optional
+kylin.web.diagnostic=
+#contact mail on web page ,optional
+kylin.web.contact_mail=
+
+###########################config info for front#######################
+
+#env DEV|QA|PROD
+deploy.env=DEV
+
+###########################config info for sandbox#######################
+kylin.sandbox=true
+
+
+###########################config info for kylin monitor#######################
+# hive jdbc url
+kylin.monitor.hive.jdbc.connection.url= jdbc:hive2://sandbox:10000
+
+#config where to parse query log,split with comma ,will also read 
$KYLIN_HOME/tomcat/logs/ by default
+kylin.monitor.ext.log.base.dir = /tmp/kylin_log1,/tmp/kylin_log2
+
+#will create external hive table to query result csv file
+#will set to kylin_query_log by default if not config here
+kylin.monitor.query.log.parse.result.table = kylin_query_log
+
+##### Config for enabling query rerouting to Hive. Disabled by default. ######
+kylin.route.hive.enabled=false
+
+# If query rerouting is enabled, provide the hive configurations
+# Default value for kylin.route.hive.url will be pointing to the embedded 
server (jdbc:hive2://)
+kylin.route.hive.url=
+kylin.route.hive.username=
+kylin.route.hive.password=

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/server/src/main/java/org/apache/kylin/rest/service/QueryService.java
----------------------------------------------------------------------
diff --git 
a/server/src/main/java/org/apache/kylin/rest/service/QueryService.java 
b/server/src/main/java/org/apache/kylin/rest/service/QueryService.java
index 6e6d9da..81905c8 100644
--- a/server/src/main/java/org/apache/kylin/rest/service/QueryService.java
+++ b/server/src/main/java/org/apache/kylin/rest/service/QueryService.java
@@ -40,6 +40,9 @@ import java.util.Set;
 
 import javax.sql.DataSource;
 
+import org.apache.commons.lang.exception.ExceptionUtils;
+import org.apache.calcite.sql.parser.impl.ParseException;
+import org.apache.calcite.sql.validate.SqlValidatorException;
 import org.apache.calcite.avatica.ColumnMetaData.Rep;
 import org.apache.commons.io.IOUtils;
 import org.apache.hadoop.hbase.client.Get;
@@ -62,6 +65,7 @@ import org.apache.kylin.rest.model.TableMeta;
 import org.apache.kylin.rest.request.PrepareSqlRequest;
 import org.apache.kylin.rest.request.SQLRequest;
 import org.apache.kylin.rest.response.SQLResponse;
+import org.apache.kylin.rest.util.HiveRerouteUtil;
 import org.apache.kylin.rest.util.QueryUtil;
 import org.apache.kylin.rest.util.Serializer;
 import org.slf4j.Logger;
@@ -372,7 +376,30 @@ public class QueryService extends BasicService {
                 results.add(new LinkedList<String>(oneRow));
                 oneRow.clear();
             }
-        } finally {
+        } catch (SQLException sqlException) {
+            // unsuccessful statement execution, retry with Hive on Spark. 
Code modification as part of the jira 
https://issues.apache.org/jira/browse/KYLIN-742
+           boolean isExpectedCause = 
(ExceptionUtils.getRootCause(sqlException).getClass().equals(SqlValidatorException.class))
 || 
(ExceptionUtils.getRootCause(sqlException).getClass().equals(ParseException.class));
+            KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
+           if (isExpectedCause && kylinConfig.isHiveReroutingEnabled()) {
+               logger.debug("query rerouting option enabled for Kylin");
+               // running query to hive
+                HiveRerouteUtil rerouteUtil = new HiveRerouteUtil();
+                try {
+                    conn = 
rerouteUtil.createConnection(kylinConfig.getHiveRerouteUrl(), 
kylinConfig.getHiveRerouteUsername(), kylinConfig.getHiveReroutePassword());
+                    resultSet = rerouteUtil.executQuery(conn, sql);
+                    columnMetas = rerouteUtil.extractColumnMetadata(resultSet, 
columnMetas);
+                    results = rerouteUtil.extractResults(resultSet, results);
+                } catch (Exception exception) {
+                    logger.error("exception in re-routing the query to hive", 
exception);
+                    throw exception;
+                } finally {
+                    rerouteUtil.closeConnection(conn);
+                } 
+            } else {
+                logger.error("exception in running the query: " + sql);
+               throw sqlException;
+           }
+           } finally {
             close(resultSet, stat, conn);
         }
 

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java
----------------------------------------------------------------------
diff --git 
a/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java 
b/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java
new file mode 100644
index 0000000..ca1e048
--- /dev/null
+++ b/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java
@@ -0,0 +1,133 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.kylin.rest.util;
+
+import java.sql.Connection;
+import java.sql.DriverManager;
+import java.sql.ResultSet;
+import java.sql.ResultSetMetaData;
+import java.sql.SQLException;
+import java.sql.Statement;
+import java.util.LinkedList;
+import java.util.List;
+
+import org.apache.kylin.rest.model.SelectedColumnMeta;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/** @author murkrishn **/
+public class HiveRerouteUtil {
+
+    private static final Logger logger = 
LoggerFactory.getLogger(HiveRerouteUtil.class);
+    public static final String driverName = "org.apache.hive.jdbc.HiveDriver";
+
+    /**
+     * Create a connection to the Hive server by passing the required 
connection parameters.
+     * @param connectionURL: JDBC URL to connect to the Hive server
+     * @param username: Username to connect with (optional)
+     * @param password: Password to connect with (optional)
+     * @return: Connection object to the Hive server
+     * @throws Exception
+     */
+    public Connection createConnection (String connectionURL, String username, 
String password) throws Exception {
+        logger.info("rerouting to : " + connectionURL + " for executing the 
query");
+        
+        try {
+            Class.forName(driverName);
+        } catch (ClassNotFoundException classNotFoundException) {
+            throw classNotFoundException;
+        }
+        
+        Connection connection = DriverManager.getConnection(connectionURL, 
username, password);
+        return connection;
+    }
+    
+    /**
+     * Close the connection to the Hive server.
+     * @param connection: Connection object to be closed
+     */
+    public void closeConnection(Connection connection) {
+        if (null != connection) {
+            try {
+                connection.close();
+            } catch (SQLException sqlException) {
+                logger.error("failed to close connection", sqlException);
+            }
+        }
+    }
+    
+    /**
+     * Execute a query in Hive.
+     * @param connection: Connection object to the Hive server
+     * @param query: Query to be executed
+     * @return: ResultSet object of the query executed
+     * @throws Exception
+     */
+    public ResultSet executQuery (Connection connection, String query) throws 
Exception {
+        Statement statement = null;
+        ResultSet resultSet = null;
+        
+        try {
+            statement = connection.createStatement();
+            resultSet = statement.executeQuery(query);
+            return resultSet;
+        } catch (SQLException sqlException) {
+            throw sqlException;
+        }
+    }
+    
+    public List<SelectedColumnMeta> extractColumnMetadata (ResultSet 
resultSet, List<SelectedColumnMeta> columnMetas) throws SQLException {
+        ResultSetMetaData metaData = null;
+        int columnCount = 0;
+        
+        try {
+            metaData = resultSet.getMetaData();
+            columnCount = metaData.getColumnCount();
+            
+            // fill in selected column meta
+            for (int i = 1; i <= columnCount; ++i) {
+                columnMetas.add(new 
SelectedColumnMeta(metaData.isAutoIncrement(i), metaData.isCaseSensitive(i), 
false, metaData.isCurrency(i), metaData.isNullable(i), false, 
metaData.getColumnDisplaySize(i), metaData.getColumnLabel(i), 
metaData.getColumnName(i), null, null, null, metaData.getPrecision(i), 
metaData.getScale(i), metaData.getColumnType(i), metaData.getColumnTypeName(i), 
metaData.isReadOnly(i), false, false));
+            }
+            
+            return columnMetas;
+        } catch (SQLException sqlException) {
+            throw sqlException;
+        }
+    }
+    
+    public List<List<String>> extractResults (ResultSet resultSet, 
List<List<String>> results) throws SQLException {
+        List<String> oneRow = new LinkedList<String>();
+        
+        try {
+            while (resultSet.next()) {
+                //logger.debug("resultSet value: " + resultSet.getString(1));
+                for (int i = 0; i < resultSet.getMetaData().getColumnCount(); 
i++) {
+                    oneRow.add((resultSet.getString(i + 1)));
+                }
+
+                results.add(new LinkedList<String>(oneRow));
+                oneRow.clear();
+            }
+            
+            return results;
+        } catch (SQLException sqlException) {
+            throw sqlException;
+        }
+    }
+}
\ No newline at end of file

[1/2] incubator-kylin git commit: KYLIN-742 Route unsupported queries to Hive

Reply via email to