[ https://issues.apache.org/jira/browse/HIVE-21456?focusedWorklogId=748963&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-748963 ]
ASF GitHub Bot logged work on HIVE-21456: ----------------------------------------- Author: ASF GitHub Bot Created on: 28/Mar/22 21:45 Start Date: 28/Mar/22 21:45 Worklog Time Spent: 10m Work Description: nrg4878 commented on a change in pull request #3105: URL: https://github.com/apache/hive/pull/3105#discussion_r828072598 ########## File path: standalone-metastore/pom.xml ########## @@ -103,6 +103,7 @@ <spotbugs.version>4.0.3</spotbugs.version> <caffeine.version>2.8.4</caffeine.version> <slf4j.version>1.7.30</slf4j.version> + <httpcomponents.core.version>4.4.10</httpcomponents.core.version> Review comment: This seems to be an older version compared to what HS2 uses. Can we atleast use the same version as HS2? This will probably address some known CVEs ########## File path: standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestRemoteHiveHttpMetaStore.java ########## @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; + +@Category(MetastoreCheckinTest.class) +public class TestRemoteHiveHttpMetaStore extends TestRemoteHiveMetaStore { + + private static final Logger LOG = LoggerFactory.getLogger(TestRemoteHiveHttpMetaStore.class); + + @Override + public void start() throws Exception { + MetastoreConf.setVar(conf, ConfVars.THRIFT_TRANSPORT_MODE, "http"); + //MetastoreConf.setBoolVar(conf, ConfVars.USE_SSL, true); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_PATH, "/home/sourabh/src/certs1/keystore"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_PASSWORD, "password"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_TYPE, "pkcs12"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYMANAGERFACTORY_ALGORITHM, "SunX509"); Review comment: nit: delete dead code. ########## File path: itests/hive-unit/src/test/java/org/apache/hive/jdbc/TestSSL.java ########## @@ -437,15 +439,36 @@ public void testConnectionWrongCertCN() throws Exception { * Test HMS server with SSL * @throws Exception */ + @Ignore @Test public void testMetastoreWithSSL() throws Exception { testSSLHMS(true); } + /** + * Test HMS server with Http + SSL + * @throws Exception + */ + @Test + public void testMetastoreWithHttps() throws Exception { + // MetastoreConf.setBoolVar(conf, MetastoreConf.ConfVars.EVENT_DB_NOTIFICATION_API_AUTH, false); + //MetastoreConf.setVar(conf, MetastoreConf.ConfVars.METASTORE_CLIENT_TRANSPORT_MODE, "http"); Review comment: nit: delete commented code ########## File path: standalone-metastore/metastore-server/src/test/java/org/apache/hadoop/hive/metastore/TestRemoteHiveHttpMetaStore.java ########## @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreUnitTest; +import org.junit.experimental.categories.Category; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import org.apache.hadoop.hive.metastore.annotation.MetastoreCheckinTest; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf; +import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; + +@Category(MetastoreCheckinTest.class) +public class TestRemoteHiveHttpMetaStore extends TestRemoteHiveMetaStore { + + private static final Logger LOG = LoggerFactory.getLogger(TestRemoteHiveHttpMetaStore.class); + + @Override + public void start() throws Exception { + MetastoreConf.setVar(conf, ConfVars.THRIFT_TRANSPORT_MODE, "http"); + //MetastoreConf.setBoolVar(conf, ConfVars.USE_SSL, true); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_PATH, "/home/sourabh/src/certs1/keystore"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_PASSWORD, "password"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYSTORE_TYPE, "pkcs12"); + //MetastoreConf.setVar(conf, ConfVars.SSL_KEYMANAGERFACTORY_ALGORITHM, "SunX509"); + LOG.info("Attempting to start test remote metastore"); + super.start(); + LOG.info("Successfully started test remote metastore"); + } + + @Override + protected HiveMetaStoreClient createClient() throws Exception { + MetastoreConf.setVar(conf, ConfVars.METASTORE_CLIENT_THRIFT_TRANSPORT_MODE, "http"); + //MetastoreConf.setBoolVar(conf, ConfVars.USE_SSL, true); + //MetastoreConf.setVar(conf, ConfVars.SSL_TRUSTSTORE_PATH, "/home/sourabh/src/certs1/truststore"); + //MetastoreConf.setVar(conf, ConfVars.SSL_TRUSTSTORE_PASSWORD, "password"); + //MetastoreConf.setVar(conf, ConfVars.SSL_TRUSTMANAGERFACTORY_ALGORITHM, "SunX509"); Review comment: nit: delete dead code ########## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java ########## @@ -1356,6 +1356,13 @@ public static ConfVars getMetaConf(String name) { "Comma-separated list of tasks that will be started in separate threads. These will be" + " started only when the metastore is running as a separate service. They must " + "implement " + METASTORE_TASK_THREAD_CLASS), + THRIFT_TRANSPORT_MODE("metastore.server.thrift.transport.mode", + "hive.metastore.server.thrift.transport.mode", "binary", + "Transport mode for thrift server in Metastore. Can be binary or http"), + THRIFT_HTTP_PATH("metastore.server.thrift.http.path", Review comment: So this config is not related to the transport. Its a path used for routing the service requests. I am ok leaving it as-as but if this is inconsistent with the HS2 side configuration for its http support, then I agree with you. ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ########## @@ -343,21 +366,162 @@ public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, startMetaStore(port, bridge, conf, false, null); } - /** - * Start Metastore based on a passed {@link HadoopThriftAuthBridge}. - * - * @param port The port on which the Thrift server will start to serve - * @param bridge - * @param conf Configuration overrides - * @param startMetaStoreThreads Start the background threads (initiator, cleaner, statsupdater, etc.) - * @param startedBackgroundThreads If startMetaStoreThreads is true, this AtomicBoolean will be switched to true, - * when all of the background threads are scheduled. Useful for testing purposes to wait - * until the MetaStore is fully initialized. - * @throws Throwable - */ - public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, - Configuration conf, boolean startMetaStoreThreads, AtomicBoolean startedBackgroundThreads) throws Throwable { - isMetaStoreRemote = true; + public static boolean isThriftServerRunning() { + return thriftServer != null && thriftServer.isRunning(); + } + + // TODO: Is it worth trying to use a server that supports HTTP/2? + // Does the Thrift http client support this? + + public static ThriftServer startHttpMetastore(int port, Configuration conf) + throws Exception { + LOG.info("Attempting to start http metastore server on port: {}", port); + + // This check is likely pointless, especially with the current state of the http + // servlet which respects whatever comes in. Putting this in place for the moment + // only to enable testing on an otherwise secure cluster. + LOG.info(" Checking if security is enabled"); + if (UserGroupInformation.isSecurityEnabled()) { + LOG.info("Logging in via keytab while starting HTTP metastore"); + // Handle renewal + String kerberosName = SecurityUtil.getServerPrincipal(MetastoreConf.getVar(conf, ConfVars.KERBEROS_PRINCIPAL), "0.0.0.0"); + String keyTabFile = MetastoreConf.getVar(conf, ConfVars.KERBEROS_KEYTAB_FILE); + UserGroupInformation.loginUserFromKeytab(kerberosName, keyTabFile); + } else { + LOG.info("Security is not enabled. Not logging in via keytab"); + } + + // TODO Bunch of http specific variables need to be defined here. + long maxMessageSize = MetastoreConf.getLongVar(conf, ConfVars.SERVER_MAX_MESSAGE_SIZE); + int minWorkerThreads = MetastoreConf.getIntVar(conf, ConfVars.SERVER_MIN_THREADS); + int maxWorkerThreads = MetastoreConf.getIntVar(conf, ConfVars.SERVER_MAX_THREADS); + + boolean useCompactProtocol = MetastoreConf.getBoolVar(conf, ConfVars.USE_THRIFT_COMPACT_PROTOCOL); + + // Server thread pool + // Start with minWorkerThreads, expand till maxWorkerThreads and reject + // subsequent requests + String threadPoolName = "HiveServer2-HttpHandler-Pool"; + ExecutorService executorService = new ThreadPoolExecutor( + minWorkerThreads, maxWorkerThreads, 60, TimeUnit.SECONDS, + new SynchronousQueue<>()); + + ExecutorThreadPool threadPool = new ExecutorThreadPool((ThreadPoolExecutor) executorService); + + // HTTP Server + org.eclipse.jetty.server.Server server = new Server(threadPool); + server.setStopAtShutdown(true); + + ServerConnector connector; + + final HttpConfiguration httpServerConf = new HttpConfiguration(); + // TODO: Read from Configuration Review comment: nit: looks like this TODO is done. Can we delete the comment? ########## File path: itests/hive-unit/src/main/java/org/hadoop/hive/jdbc/SSLTestUtils.java ########## @@ -67,6 +67,12 @@ public static void setMetastoreSslConf(HiveConf conf) { KEY_STORE_TRUST_STORE_PASSWORD); } + public static void setMetastoreHttpsConf(HiveConf conf) { + setMetastoreSslConf(conf); + MetastoreConf.setVar(conf, MetastoreConf.ConfVars.TRANSPORT_MODE, "http"); Review comment: +1 ########## File path: standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ########## @@ -343,21 +366,162 @@ public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, startMetaStore(port, bridge, conf, false, null); } - /** - * Start Metastore based on a passed {@link HadoopThriftAuthBridge}. - * - * @param port The port on which the Thrift server will start to serve - * @param bridge - * @param conf Configuration overrides - * @param startMetaStoreThreads Start the background threads (initiator, cleaner, statsupdater, etc.) - * @param startedBackgroundThreads If startMetaStoreThreads is true, this AtomicBoolean will be switched to true, - * when all of the background threads are scheduled. Useful for testing purposes to wait - * until the MetaStore is fully initialized. - * @throws Throwable - */ - public static void startMetaStore(int port, HadoopThriftAuthBridge bridge, - Configuration conf, boolean startMetaStoreThreads, AtomicBoolean startedBackgroundThreads) throws Throwable { - isMetaStoreRemote = true; + public static boolean isThriftServerRunning() { + return thriftServer != null && thriftServer.isRunning(); + } + + // TODO: Is it worth trying to use a server that supports HTTP/2? + // Does the Thrift http client support this? + + public static ThriftServer startHttpMetastore(int port, Configuration conf) + throws Exception { + LOG.info("Attempting to start http metastore server on port: {}", port); + + // This check is likely pointless, especially with the current state of the http + // servlet which respects whatever comes in. Putting this in place for the moment + // only to enable testing on an otherwise secure cluster. + LOG.info(" Checking if security is enabled"); + if (UserGroupInformation.isSecurityEnabled()) { + LOG.info("Logging in via keytab while starting HTTP metastore"); + // Handle renewal + String kerberosName = SecurityUtil.getServerPrincipal(MetastoreConf.getVar(conf, ConfVars.KERBEROS_PRINCIPAL), "0.0.0.0"); + String keyTabFile = MetastoreConf.getVar(conf, ConfVars.KERBEROS_KEYTAB_FILE); + UserGroupInformation.loginUserFromKeytab(kerberosName, keyTabFile); + } else { + LOG.info("Security is not enabled. Not logging in via keytab"); + } + + // TODO Bunch of http specific variables need to be defined here. + long maxMessageSize = MetastoreConf.getLongVar(conf, ConfVars.SERVER_MAX_MESSAGE_SIZE); + int minWorkerThreads = MetastoreConf.getIntVar(conf, ConfVars.SERVER_MIN_THREADS); + int maxWorkerThreads = MetastoreConf.getIntVar(conf, ConfVars.SERVER_MAX_THREADS); + + boolean useCompactProtocol = MetastoreConf.getBoolVar(conf, ConfVars.USE_THRIFT_COMPACT_PROTOCOL); + + // Server thread pool + // Start with minWorkerThreads, expand till maxWorkerThreads and reject + // subsequent requests + String threadPoolName = "HiveServer2-HttpHandler-Pool"; + ExecutorService executorService = new ThreadPoolExecutor( + minWorkerThreads, maxWorkerThreads, 60, TimeUnit.SECONDS, + new SynchronousQueue<>()); + + ExecutorThreadPool threadPool = new ExecutorThreadPool((ThreadPoolExecutor) executorService); + + // HTTP Server + org.eclipse.jetty.server.Server server = new Server(threadPool); + server.setStopAtShutdown(true); + + ServerConnector connector; + + final HttpConfiguration httpServerConf = new HttpConfiguration(); + // TODO: Read from Configuration + httpServerConf.setRequestHeaderSize( + MetastoreConf.getIntVar(conf, ConfVars.METASTORE_THRIFT_HTTP_REQUEST_HEADER_SIZE)); + httpServerConf.setResponseHeaderSize( + MetastoreConf.getIntVar(conf, ConfVars.METASTORE_THRIFT_HTTP_RESPONSE_HEADER_SIZE)); + + final HttpConnectionFactory http = new HttpConnectionFactory(httpServerConf); + + boolean useSsl = MetastoreConf.getBoolVar(conf, ConfVars.USE_SSL); + String schemeName = useSsl ? "https" : "http"; + if (useSsl) { + String keyStorePath = MetastoreConf.getVar(conf, ConfVars.SSL_KEYSTORE_PATH).trim(); + if (keyStorePath.isEmpty()) { + throw new IllegalArgumentException(ConfVars.SSL_KEYSTORE_PATH.toString() + + " Not configured for SSL connection"); + } + String keyStorePassword = + MetastoreConf.getPassword(conf, MetastoreConf.ConfVars.SSL_KEYSTORE_PASSWORD); + String keyStoreType = + MetastoreConf.getVar(conf, ConfVars.SSL_KEYSTORE_TYPE).trim(); + String keyStoreAlgorithm = + MetastoreConf.getVar(conf, ConfVars.SSL_KEYMANAGERFACTORY_ALGORITHM).trim(); + + SslContextFactory sslContextFactory = new SslContextFactory(); + // TODO: Add support for excluding protocols? Review comment: nit: Looks like this TODO has already been implemented as welll ########## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java ########## @@ -1496,6 +1503,23 @@ public static ConfVars getMetaConf(String name) { USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false, "Comma separated list of users who are in admin role for bootstrapping.\n" + "More users can be added in ADMIN role later."), + // TODO: Should we have a separate config for the metastoreclient or THRIFT_TRANSPORT_MODE + // would suffice ? + METASTORE_CLIENT_THRIFT_TRANSPORT_MODE("metastore.client.thrift.transport.mode", + "hive.metastore.client.thrift.transport.mode", "binary", + "Transport mode to be used by the metastore client. It should be the same as " + THRIFT_TRANSPORT_MODE), + METASTORE_CLIENT_THRIFT_HTTP_PATH("metastore.client.thrift.http.path", Review comment: Should this not be in HiveConf.java instead of MetastoreConf.java? Its my understanding that this is used by HMS client like HS2. So it should be set in the client side config? ########## File path: standalone-metastore/metastore-common/src/main/java/org/apache/hadoop/hive/metastore/conf/MetastoreConf.java ########## @@ -1496,6 +1503,23 @@ public static ConfVars getMetaConf(String name) { USERS_IN_ADMIN_ROLE("metastore.users.in.admin.role", "hive.users.in.admin.role", "", false, "Comma separated list of users who are in admin role for bootstrapping.\n" + "More users can be added in ADMIN role later."), + // TODO: Should we have a separate config for the metastoreclient or THRIFT_TRANSPORT_MODE + // would suffice ? + METASTORE_CLIENT_THRIFT_TRANSPORT_MODE("metastore.client.thrift.transport.mode", Review comment: Should this not be in HiveConf.java instead of MetastoreConf.java? Its my understanding that this is used by HMS client like HS2. So it should be set in the client side config? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org Issue Time Tracking ------------------- Worklog Id: (was: 748963) Time Spent: 2h (was: 1h 50m) > Hive Metastore Thrift over HTTP > ------------------------------- > > Key: HIVE-21456 > URL: https://issues.apache.org/jira/browse/HIVE-21456 > Project: Hive > Issue Type: New Feature > Components: Metastore, Standalone Metastore > Reporter: Amit Khanna > Assignee: Sourabh Goyal > Priority: Major > Labels: pull-request-available > Attachments: HIVE-21456.2.patch, HIVE-21456.3.patch, > HIVE-21456.4.patch, HIVE-21456.patch > > Time Spent: 2h > Remaining Estimate: 0h > > Hive Metastore currently doesn't have support for HTTP transport because of > which it is not possible to access it via Knox. Adding support for Thrift > over HTTP transport will allow the clients to access via Knox -- This message was sent by Atlassian Jira (v8.20.1#820001)