Repository: incubator-kylin Updated Branches: refs/heads/1.x-staging 67322634a -> 143aa608b
KYLIN-742 Route unsupported queries to Hive Signed-off-by: Li, Yang <yang...@ebay.com> Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/79904380 Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/79904380 Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/79904380 Branch: refs/heads/1.x-staging Commit: 7990438023e898f1f01a62185f5607b72ef82643 Parents: 6732263 Author: murkrishn <murkris...@ebay.com> Authored: Mon Sep 14 23:51:39 2015 -0700 Committer: Li, Yang <yang...@ebay.com> Committed: Tue Sep 29 15:40:26 2015 +0800 ---------------------------------------------------------------------- .../org/apache/kylin/common/KylinConfig.java | 27 ++ .../test_case_data/sandbox/kylin.properties | 278 ++++++++++--------- .../apache/kylin/rest/service/QueryService.java | 29 +- .../apache/kylin/rest/util/HiveRerouteUtil.java | 133 +++++++++ 4 files changed, 330 insertions(+), 137 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/common/src/main/java/org/apache/kylin/common/KylinConfig.java ---------------------------------------------------------------------- diff --git a/common/src/main/java/org/apache/kylin/common/KylinConfig.java b/common/src/main/java/org/apache/kylin/common/KylinConfig.java index d3220ee..72b6a65 100644 --- a/common/src/main/java/org/apache/kylin/common/KylinConfig.java +++ b/common/src/main/java/org/apache/kylin/common/KylinConfig.java @@ -132,6 +132,15 @@ public class KylinConfig { public static final String HBASE_REGION_COUNT_MIN = "kylin.hbase.region.count.min"; public static final String HBASE_REGION_COUNT_MAX = "kylin.hbase.region.count.max"; + + // property for Hive query rerouting enablement + public static final String KYLIN_ROUTE_HIVE_ENABLED = "kylin.route.hive.enabled"; + public static final boolean KYLIN_ROUTE_HIVE_ENABLED_DEFAULT = false; + // JDBC Hive connection details for query rerouting + public static final String KYLIN_ROUTE_HIVE_URL = "kylin.route.hive.url"; + public static final String KYLIN_ROUTE_HIVE_URL_DEFAULT = "jdbc:hive2://sandbox:10000"; + public static final String KYLIN_ROUTE_HIVE_USERNAME = "kylin.route.hive.username"; + public static final String KYLIN_ROUTE_HIVE_PASSWORD = "kylin.route.hive.password"; // static cached instances private static KylinConfig ENV_INSTANCE = null; @@ -265,6 +274,24 @@ public class KylinConfig { // ============================================================================ + // start: properties for Hive rerouting + public boolean isHiveReroutingEnabled() { + return Boolean.parseBoolean(getOptional(KYLIN_ROUTE_HIVE_ENABLED)); + } + + public String getHiveRerouteUrl() { + return getOptional(KYLIN_ROUTE_HIVE_URL, KYLIN_ROUTE_HIVE_URL_DEFAULT); + } + + public String getHiveRerouteUsername() { + return getOptional(KYLIN_ROUTE_HIVE_USERNAME, ""); + } + + public String getHiveReroutePassword() { + return getOptional(KYLIN_ROUTE_HIVE_PASSWORD, ""); + } + // end: properties for JDBC Hive rerouting + public String getStorageUrl() { return storageUrl; } http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/examples/test_case_data/sandbox/kylin.properties ---------------------------------------------------------------------- diff --git a/examples/test_case_data/sandbox/kylin.properties b/examples/test_case_data/sandbox/kylin.properties index bcae574..6fe63a8 100644 --- a/examples/test_case_data/sandbox/kylin.properties +++ b/examples/test_case_data/sandbox/kylin.properties @@ -1,136 +1,142 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -## Config for Kylin Engine ## - -# List of web servers in use, this enables one web server instance to sync up with other servers. -kylin.rest.servers=localhost:7070 - -#set display timezone on UI,format like[GMT+N or GMT-N] -kylin.rest.timezone=GMT-8 - -# The metadata store in hbase -kylin.metadata.url=kylin_metadata@hbase - -# The storage for final cube file in hbase -kylin.storage.url=hbase - -# Temp folder in hdfs, make sure user has the right access to the hdfs directory -kylin.hdfs.working.dir=/kylin - -kylin.job.mapreduce.default.reduce.input.mb=500 - -# If true, job engine will not assume that hadoop CLI reside on the same server as it self -# you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password -kylin.job.run.as.remote.cmd=false - -# Only necessary when kylin.job.run.as.remote.cmd=true -kylin.job.remote.cli.hostname= - -# Only necessary when kylin.job.run.as.remote.cmd=true -kylin.job.remote.cli.username= - -# Only necessary when kylin.job.run.as.remote.cmd=true -kylin.job.remote.cli.password= - -# Used by test cases to prepare synthetic data for sample cube -kylin.job.remote.cli.working.dir=/tmp/kylin - -# Max count of concurrent jobs running -kylin.job.concurrent.max.limit=10 - -# Time interval to check hadoop job status -kylin.job.yarn.app.rest.check.interval.seconds=10 - -# Hive database name for putting the intermediate flat tables -kylin.job.hive.database.for.intermediatetable=default - -#default compression codec for htable,snappy,lzo,gzip,lz4 -kylin.hbase.default.compression.codec=gzip - -# The cut size for hbase region, in GB. -# E.g, for cube whose capacity be marked as "SMALL", split region per 10GB by default -kylin.hbase.region.cut.small=10 -kylin.hbase.region.cut.medium=20 -kylin.hbase.region.cut.large=100 - -# HBase min and max region count -kylin.hbase.region.count.min=1 -kylin.hbase.region.count.min=500 - -## Config for Restful APP ## -# database connection settings: -ldap.server= -ldap.username= -ldap.password= -ldap.user.searchBase= -ldap.user.searchPattern= -ldap.user.groupSearchBase= -ldap.service.searchBase=OU= -ldap.service.searchPattern= -ldap.service.groupSearchBase= -acl.adminRole= -acl.defaultRole= -ganglia.group= -ganglia.port=8664 - -## Config for mail service - -# If true, will send email notification; -mail.enabled=false -mail.host= -mail.username= -mail.password= -mail.sender= - -###########################config info for web####################### - -#help info ,format{name|displayName|link} ,optional -kylin.web.help.length=4 -kylin.web.help.0=start|Getting Started| -kylin.web.help.1=odbc|ODBC Driver| -kylin.web.help.2=tableau|Tableau Guide| -kylin.web.help.3=onboard|Cube Design Tutorial| -#hadoop url link ,optional -kylin.web.hadoop= -#job diagnostic url link ,optional -kylin.web.diagnostic= -#contact mail on web page ,optional -kylin.web.contact_mail= - -###########################config info for front####################### - -#env DEV|QA|PROD -deploy.env=DEV - -###########################config info for sandbox####################### -kylin.sandbox=true - - -###########################config info for kylin monitor####################### -# hive jdbc url -kylin.monitor.hive.jdbc.connection.url= jdbc:hive2://sandbox:10000 - -#config where to parse query log,split with comma ,will also read $KYLIN_HOME/tomcat/logs/ by default -kylin.monitor.ext.log.base.dir = /tmp/kylin_log1,/tmp/kylin_log2 - -#will create external hive table to query result csv file -#will set to kylin_query_log by default if not config here -kylin.monitor.query.log.parse.result.table = kylin_query_log - - - +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +## Config for Kylin Engine ## + +# List of web servers in use, this enables one web server instance to sync up with other servers. +kylin.rest.servers=localhost:7070 + +#set display timezone on UI,format like[GMT+N or GMT-N] +kylin.rest.timezone=GMT-8 + +# The metadata store in hbase +kylin.metadata.url=kylin_metadata@hbase + +# The storage for final cube file in hbase +kylin.storage.url=hbase + +# Temp folder in hdfs, make sure user has the right access to the hdfs directory +kylin.hdfs.working.dir=/kylin + +kylin.job.mapreduce.default.reduce.input.mb=500 + +# If true, job engine will not assume that hadoop CLI reside on the same server as it self +# you will have to specify kylin.job.remote.cli.hostname, kylin.job.remote.cli.username and kylin.job.remote.cli.password +kylin.job.run.as.remote.cmd=false + +# Only necessary when kylin.job.run.as.remote.cmd=true +kylin.job.remote.cli.hostname= + +# Only necessary when kylin.job.run.as.remote.cmd=true +kylin.job.remote.cli.username= + +# Only necessary when kylin.job.run.as.remote.cmd=true +kylin.job.remote.cli.password= + +# Used by test cases to prepare synthetic data for sample cube +kylin.job.remote.cli.working.dir=/tmp/kylin + +# Max count of concurrent jobs running +kylin.job.concurrent.max.limit=10 + +# Time interval to check hadoop job status +kylin.job.yarn.app.rest.check.interval.seconds=10 + +# Hive database name for putting the intermediate flat tables +kylin.job.hive.database.for.intermediatetable=default + +#default compression codec for htable,snappy,lzo,gzip,lz4 +kylin.hbase.default.compression.codec=gzip + +# The cut size for hbase region, in GB. +# E.g, for cube whose capacity be marked as "SMALL", split region per 10GB by default +kylin.hbase.region.cut.small=10 +kylin.hbase.region.cut.medium=20 +kylin.hbase.region.cut.large=100 + +# HBase min and max region count +kylin.hbase.region.count.min=1 +kylin.hbase.region.count.min=500 + +## Config for Restful APP ## +# database connection settings: +ldap.server= +ldap.username= +ldap.password= +ldap.user.searchBase= +ldap.user.searchPattern= +ldap.user.groupSearchBase= +ldap.service.searchBase=OU= +ldap.service.searchPattern= +ldap.service.groupSearchBase= +acl.adminRole= +acl.defaultRole= +ganglia.group= +ganglia.port=8664 + +## Config for mail service + +# If true, will send email notification; +mail.enabled=false +mail.host= +mail.username= +mail.password= +mail.sender= + +###########################config info for web####################### + +#help info ,format{name|displayName|link} ,optional +kylin.web.help.length=4 +kylin.web.help.0=start|Getting Started| +kylin.web.help.1=odbc|ODBC Driver| +kylin.web.help.2=tableau|Tableau Guide| +kylin.web.help.3=onboard|Cube Design Tutorial| +#hadoop url link ,optional +kylin.web.hadoop= +#job diagnostic url link ,optional +kylin.web.diagnostic= +#contact mail on web page ,optional +kylin.web.contact_mail= + +###########################config info for front####################### + +#env DEV|QA|PROD +deploy.env=DEV + +###########################config info for sandbox####################### +kylin.sandbox=true + + +###########################config info for kylin monitor####################### +# hive jdbc url +kylin.monitor.hive.jdbc.connection.url= jdbc:hive2://sandbox:10000 + +#config where to parse query log,split with comma ,will also read $KYLIN_HOME/tomcat/logs/ by default +kylin.monitor.ext.log.base.dir = /tmp/kylin_log1,/tmp/kylin_log2 + +#will create external hive table to query result csv file +#will set to kylin_query_log by default if not config here +kylin.monitor.query.log.parse.result.table = kylin_query_log + +##### Config for enabling query rerouting to Hive. Disabled by default. ###### +kylin.route.hive.enabled=false + +# If query rerouting is enabled, provide the hive configurations +# Default value for kylin.route.hive.url will be pointing to the embedded server (jdbc:hive2://) +kylin.route.hive.url= +kylin.route.hive.username= +kylin.route.hive.password= http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/server/src/main/java/org/apache/kylin/rest/service/QueryService.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/kylin/rest/service/QueryService.java b/server/src/main/java/org/apache/kylin/rest/service/QueryService.java index 6e6d9da..81905c8 100644 --- a/server/src/main/java/org/apache/kylin/rest/service/QueryService.java +++ b/server/src/main/java/org/apache/kylin/rest/service/QueryService.java @@ -40,6 +40,9 @@ import java.util.Set; import javax.sql.DataSource; +import org.apache.commons.lang.exception.ExceptionUtils; +import org.apache.calcite.sql.parser.impl.ParseException; +import org.apache.calcite.sql.validate.SqlValidatorException; import org.apache.calcite.avatica.ColumnMetaData.Rep; import org.apache.commons.io.IOUtils; import org.apache.hadoop.hbase.client.Get; @@ -62,6 +65,7 @@ import org.apache.kylin.rest.model.TableMeta; import org.apache.kylin.rest.request.PrepareSqlRequest; import org.apache.kylin.rest.request.SQLRequest; import org.apache.kylin.rest.response.SQLResponse; +import org.apache.kylin.rest.util.HiveRerouteUtil; import org.apache.kylin.rest.util.QueryUtil; import org.apache.kylin.rest.util.Serializer; import org.slf4j.Logger; @@ -372,7 +376,30 @@ public class QueryService extends BasicService { results.add(new LinkedList<String>(oneRow)); oneRow.clear(); } - } finally { + } catch (SQLException sqlException) { + // unsuccessful statement execution, retry with Hive on Spark. Code modification as part of the jira https://issues.apache.org/jira/browse/KYLIN-742 + boolean isExpectedCause = (ExceptionUtils.getRootCause(sqlException).getClass().equals(SqlValidatorException.class)) || (ExceptionUtils.getRootCause(sqlException).getClass().equals(ParseException.class)); + KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv(); + if (isExpectedCause && kylinConfig.isHiveReroutingEnabled()) { + logger.debug("query rerouting option enabled for Kylin"); + // running query to hive + HiveRerouteUtil rerouteUtil = new HiveRerouteUtil(); + try { + conn = rerouteUtil.createConnection(kylinConfig.getHiveRerouteUrl(), kylinConfig.getHiveRerouteUsername(), kylinConfig.getHiveReroutePassword()); + resultSet = rerouteUtil.executQuery(conn, sql); + columnMetas = rerouteUtil.extractColumnMetadata(resultSet, columnMetas); + results = rerouteUtil.extractResults(resultSet, results); + } catch (Exception exception) { + logger.error("exception in re-routing the query to hive", exception); + throw exception; + } finally { + rerouteUtil.closeConnection(conn); + } + } else { + logger.error("exception in running the query: " + sql); + throw sqlException; + } + } finally { close(resultSet, stat, conn); } http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/79904380/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java ---------------------------------------------------------------------- diff --git a/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java b/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java new file mode 100644 index 0000000..ca1e048 --- /dev/null +++ b/server/src/main/java/org/apache/kylin/rest/util/HiveRerouteUtil.java @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.kylin.rest.util; + +import java.sql.Connection; +import java.sql.DriverManager; +import java.sql.ResultSet; +import java.sql.ResultSetMetaData; +import java.sql.SQLException; +import java.sql.Statement; +import java.util.LinkedList; +import java.util.List; + +import org.apache.kylin.rest.model.SelectedColumnMeta; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +/** @author murkrishn **/ +public class HiveRerouteUtil { + + private static final Logger logger = LoggerFactory.getLogger(HiveRerouteUtil.class); + public static final String driverName = "org.apache.hive.jdbc.HiveDriver"; + + /** + * Create a connection to the Hive server by passing the required connection parameters. + * @param connectionURL: JDBC URL to connect to the Hive server + * @param username: Username to connect with (optional) + * @param password: Password to connect with (optional) + * @return: Connection object to the Hive server + * @throws Exception + */ + public Connection createConnection (String connectionURL, String username, String password) throws Exception { + logger.info("rerouting to : " + connectionURL + " for executing the query"); + + try { + Class.forName(driverName); + } catch (ClassNotFoundException classNotFoundException) { + throw classNotFoundException; + } + + Connection connection = DriverManager.getConnection(connectionURL, username, password); + return connection; + } + + /** + * Close the connection to the Hive server. + * @param connection: Connection object to be closed + */ + public void closeConnection(Connection connection) { + if (null != connection) { + try { + connection.close(); + } catch (SQLException sqlException) { + logger.error("failed to close connection", sqlException); + } + } + } + + /** + * Execute a query in Hive. + * @param connection: Connection object to the Hive server + * @param query: Query to be executed + * @return: ResultSet object of the query executed + * @throws Exception + */ + public ResultSet executQuery (Connection connection, String query) throws Exception { + Statement statement = null; + ResultSet resultSet = null; + + try { + statement = connection.createStatement(); + resultSet = statement.executeQuery(query); + return resultSet; + } catch (SQLException sqlException) { + throw sqlException; + } + } + + public List<SelectedColumnMeta> extractColumnMetadata (ResultSet resultSet, List<SelectedColumnMeta> columnMetas) throws SQLException { + ResultSetMetaData metaData = null; + int columnCount = 0; + + try { + metaData = resultSet.getMetaData(); + columnCount = metaData.getColumnCount(); + + // fill in selected column meta + for (int i = 1; i <= columnCount; ++i) { + columnMetas.add(new SelectedColumnMeta(metaData.isAutoIncrement(i), metaData.isCaseSensitive(i), false, metaData.isCurrency(i), metaData.isNullable(i), false, metaData.getColumnDisplaySize(i), metaData.getColumnLabel(i), metaData.getColumnName(i), null, null, null, metaData.getPrecision(i), metaData.getScale(i), metaData.getColumnType(i), metaData.getColumnTypeName(i), metaData.isReadOnly(i), false, false)); + } + + return columnMetas; + } catch (SQLException sqlException) { + throw sqlException; + } + } + + public List<List<String>> extractResults (ResultSet resultSet, List<List<String>> results) throws SQLException { + List<String> oneRow = new LinkedList<String>(); + + try { + while (resultSet.next()) { + //logger.debug("resultSet value: " + resultSet.getString(1)); + for (int i = 0; i < resultSet.getMetaData().getColumnCount(); i++) { + oneRow.add((resultSet.getString(i + 1))); + } + + results.add(new LinkedList<String>(oneRow)); + oneRow.clear(); + } + + return results; + } catch (SQLException sqlException) { + throw sqlException; + } + } +} \ No newline at end of file