[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102393352 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractServerConnection.java --- @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import io.netty.channel.socket.SocketChannel; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.hadoop.security.HadoopKerberosName; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; + +import javax.security.auth.login.LoginException; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; + +public abstract class AbstractServerConnection --- End diff -- Fixed all the warnings, thanks for the catch. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102393799 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ServerAuthenticationHandler.java --- @@ -0,0 +1,269 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import com.google.protobuf.Internal.EnumLite; +import com.google.protobuf.InvalidProtocolBufferException; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufInputStream; +import org.apache.drill.exec.proto.UserBitShared.SaslMessage; +import org.apache.drill.exec.proto.UserBitShared.SaslStatus; +import org.apache.drill.exec.rpc.RequestHandler; +import org.apache.drill.exec.rpc.Response; +import org.apache.drill.exec.rpc.ResponseSender; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.ServerConnection; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.util.EnumMap; +import java.util.Map; + +import static com.google.common.base.Preconditions.checkNotNull; + +public class ServerAuthenticationHandler implements RequestHandler { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(ServerAuthenticationHandler.class); + + private static final ImmutableMap RESPONSE_PROCESSORS; + + static { +final Map map = new EnumMap<>(SaslStatus.class); +map.put(SaslStatus.SASL_START, new SaslStartProcessor()); +map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor()); +map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor()); +map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor()); +RESPONSE_PROCESSORS = Maps.immutableEnumMap(map); + } + + private final RequestHandler requestHandler; + private final int saslRequestTypeValue; + private final T saslResponseType; + + public ServerAuthenticationHandler(final RequestHandler requestHandler, final int saslRequestTypeValue, + final T saslResponseType) { +this.requestHandler = requestHandler; +this.saslRequestTypeValue = saslRequestTypeValue; +this.saslResponseType = saslResponseType; + } + + @Override + public void handle(C connection, int rpcType, ByteBuf pBody, ByteBuf dBody, ResponseSender sender) + throws RpcException { +final String remoteAddress = connection.getRemoteAddress().toString(); + +// exchange involves server "challenges" and client "responses" (initiated by client) +if (saslRequestTypeValue == rpcType) { + final SaslMessage saslResponse; + try { +saslResponse = SaslMessage.PARSER.parseFrom(new ByteBufInputStream(pBody)); + } catch (final InvalidProtocolBufferException e) { +handleAuthFailure(connection, remoteAddress, sender, e, saslResponseType); +return; + } + + logger.trace("Received SASL message {} from {}", saslResponse.getStatus(), remoteAddress); + final SaslResponseProcessor processor = RESPONSE_PROCESSORS.get(saslResponse.getStatus()); + if (processor == null) { +logger.info("Unknown message type from client from {}. Will stop authentication.", remoteAddress); +handleAuthFailure(connection, remoteAddress, sender, new SaslException("Received unexpected message"), +saslResponseType); +return; + } + + final SaslResponseContext context = new SaslResponseContext<>(saslResponse, connection, r
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102395784 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java --- @@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener resultsListener, RunQuery query) { send(queryResultHandler.getWrappedListener(resultsListener), RpcType.RUN_QUERY, query, QueryId.class); } - public void connect(RpcConnectionHandler handler, DrillbitEndpoint endpoint, - UserProperties props, UserBitShared.UserCredentials credentials) { + public CheckedFuture connect(DrillbitEndpoint endpoint, DrillProperties parameters, + UserCredentials credentials) { +final FutureHandler handler = new FutureHandler(); UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder() .setRpcVersion(UserRpcConfig.RPC_VERSION) .setSupportListening(true) .setSupportComplexTypes(supportComplexTypes) .setSupportTimeout(true) .setCredentials(credentials) -.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)); +.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)) +.setSaslSupport(SaslSupport.SASL_AUTH) +.setProperties(parameters.serializeForServer()); +this.properties = parameters; + + connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), +hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +return handler; + } + + /** + * Check (after {@link #connect connecting}) if server requires authentication. + * + * @return true if server requires authentication + */ + public boolean serverRequiresAuthentication() { +return supportedAuthMechs != null; + } + + /** + * Returns a list of supported authentication mechanism. If called before {@link #connect connecting}, + * returns null. If called after {@link #connect connecting}, returns a list of supported mechanisms + * iff authentication is required. + * + * @return list of supported authentication mechanisms + */ + public List getSupportedAuthenticationMechanisms() { --- End diff -- Applications (maybe in future) can choose a mechanism. Example: ``` userClient.connect(props); list = userClient.getSupportedAuthenticationMechanisms(); // pick one from 'list', maybe through callback to user userClient.authenticate(props2); ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102396422 --- Diff: exec/rpc/src/main/java/org/apache/drill/exec/rpc/ClientConnection.java --- @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import javax.security.sasl.SaslClient; + +public interface ClientConnection extends RemoteConnection { --- End diff -- Same comment as one of the above (saslClient will be used for future messages on the connection). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102391399 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlConnectionConfig.java --- @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.control; + +import org.apache.drill.exec.exception.DrillbitStartupException; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; +import org.apache.drill.exec.rpc.BitConnectionConfig; +import org.apache.drill.exec.server.BootStrapContext; +import org.apache.drill.exec.work.batch.ControlMessageHandler; + +// package private +class ControlConnectionConfig extends BitConnectionConfig { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ControlConnectionConfig.class); + + private final ControlMessageHandler handler; + + private DrillbitEndpoint localEndpoint; + + ControlConnectionConfig(BufferAllocator allocator, BootStrapContext context, ControlMessageHandler handler) + throws DrillbitStartupException { +super(allocator, context); +this.handler = handler; + } + + @Override + public String getName() { +return "control"; // unused + } + + ControlMessageHandler getMessageHandler() { +return handler; + } + + void setLocalEndpoint(DrillbitEndpoint endpoint) { --- End diff -- Hmm what was the previous approach? I only moved this code from another class. Immutability would be nice, but I am not sure what the change entails (maybe a little involved). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102389973 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java --- @@ -89,14 +90,36 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { -return handler.handle(connection, rpcType, pBody, dBody); + protected void handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { +connection.getCurrentHandler().handle(connection, rpcType, pBody, dBody, sender); } @Override protected void validateHandshake(BitControlHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != ControlRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication + final SaslClient saslClient; --- End diff -- The saslClient is instantiated differently in user->bit comm. and bit->bit comm. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102396712 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java --- @@ -105,8 +128,83 @@ protected void finalizeConnection(BitControlHandshake handshake, ControlConnecti connection.setEndpoint(handshake.getEndpoint()); } - public ControlConnection getConnection() { -return this.connection; + @Override + protected RpcCommand + getInitialCommand(final RpcCommand command) { +if (config.getAuthMechanismToUse() == null) { + return super.getInitialCommand(command); +} else { + return new AuthenticationCommand<>(command); --- End diff -- Thanks for the catch. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102394939 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainFactory.java --- @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security.plain; + +import org.apache.drill.common.config.DrillProperties; +import org.apache.drill.exec.rpc.security.AuthenticatorFactory; +import org.apache.drill.exec.rpc.security.FastSaslServerFactory; +import org.apache.drill.exec.rpc.security.FastSaslClientFactory; +import org.apache.drill.exec.rpc.user.security.UserAuthenticationException; +import org.apache.drill.exec.rpc.user.security.UserAuthenticator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; +import java.security.Security; +import java.util.Map; + +public class PlainFactory implements AuthenticatorFactory { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlainFactory.class); + + public static final String SIMPLE_NAME = PlainServer.MECHANISM_NAME; + + static { +Security.addProvider(new PlainServer.PlainServerProvider()); + } + + private final UserAuthenticator authenticator; + + public PlainFactory() { +this.authenticator = null; + } + + public PlainFactory(final UserAuthenticator authenticator) { +this.authenticator = authenticator; + } + + @Override + public String getSimpleName() { +return SIMPLE_NAME; + } + + @Override + public UserGroupInformation createAndLoginUser(Map properties) throws IOException { +final Configuration conf = new Configuration(); +UserGroupInformation.setConfiguration(conf); +try { + return UserGroupInformation.getCurrentUser(); +} catch (final IOException e) { + logger.debug("Login failed.", e); + final Throwable cause = e.getCause(); + if (cause instanceof LoginException) { +throw new SaslException("Failed to login.", cause); + } + throw new SaslException("Unexpected failure trying to login. ", cause); +} + } + + @Override + public SaslServer createSaslServer(final UserGroupInformation ugi, final Map properties) + throws SaslException { +return FastSaslServerFactory.getInstance().createSaslServer(SIMPLE_NAME, null /** protocol */, +null /** serverName */, properties, new PlainServerCallbackHandler()); + } + + @Override + public SaslClient createSaslClient(final UserGroupInformation ugi, final Map properties) + throws SaslException { +final String userName = (String) properties.get(DrillProperties.USER); +final String password = (String) properties.get(DrillProperties.PASSWORD); + +return FastSaslClientFactory.getInstance().createSaslClient(new String[]{SIMPLE_NAME}, +null /** authorization ID */, null, null, properties, new CallbackHandler() { + @Override + public void handle(final Callback[] callbacks) throws IOException, UnsupportedCallbackException { +for (final Callback callback : callbacks) { + if (callback instanceof NameCallback) { +NameCallback.class.cast(callback).setName(userName); +
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102396769 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataClient.java --- @@ -75,27 +87,118 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { + protected void handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { throw new UnsupportedOperationException("DataClient is unidirectional by design."); } BufferAllocator getAllocator() { -return allocator; +return config.getAllocator(); } @Override protected void validateHandshake(BitServerHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != DataRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication + final SaslClient saslClient; + try { +saslClient = config.getAuthFactory(handshake.getAuthenticationMechanismsList()) +.createSaslClient(UserGroupInformation.getLoginUser(), +config.getSaslClientProperties(remoteEndpoint)); + } catch (final IOException e) { +throw new RpcException(String.format("Failed to initiate authenticate to %s", remoteEndpoint.getAddress()), e); + } + if (saslClient == null) { +throw new RpcException("Unexpected failure. Could not initiate SASL exchange."); + } + connection.setSaslClient(saslClient); +} else { + if (config.getAuthMechanismToUse() != null) { +throw new RpcException(String.format("Drillbit (%s) does not require auth, but auth is enabled.", +remoteEndpoint.getAddress())); + } } } @Override - protected void finalizeConnection(BitServerHandshake handshake, DataClientConnection connection) { + protected RpcCommand + getInitialCommand(final RpcCommand command) { +if (config.getAuthMechanismToUse() == null) { + return super.getInitialCommand(command); +} else { + return new AuthenticationCommand<>(command); --- End diff -- Yes. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102396526 --- Diff: exec/rpc/src/main/java/org/apache/drill/exec/rpc/ServerConnection.java --- @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import javax.security.sasl.SaslServer; +import java.io.IOException; + +public interface ServerConnection extends RemoteConnection { + + // init only once + void initSaslServer(String mechanismName) throws IOException; --- End diff -- Same comment as one of the above (saslServer will be used for future messages on the connection). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102391859 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/AuthenticationOutcomeListener.java --- @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import com.google.protobuf.Internal.EnumLite; +import com.google.protobuf.MessageLite; +import io.netty.buffer.ByteBuf; +import org.apache.drill.exec.proto.UserBitShared.SaslMessage; +import org.apache.drill.exec.proto.UserBitShared.SaslStatus; +import org.apache.drill.exec.rpc.BasicClient; +import org.apache.drill.exec.rpc.ClientConnection; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.RpcOutcomeListener; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.util.EnumMap; +import java.util.Map; + +import static com.google.common.base.Preconditions.checkNotNull; + +public class AuthenticationOutcomeListener +implements RpcOutcomeListener { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(AuthenticationOutcomeListener.class); + + private static final ImmutableMap CHALLENGE_PROCESSORS; + static { +final Map map = new EnumMap<>(SaslStatus.class); +map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor()); +map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor()); +map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor()); +CHALLENGE_PROCESSORS = Maps.immutableEnumMap(map); + } + + private final BasicClient client; + private final R connection; + private final T saslRpcType; + private final UserGroupInformation ugi; + private final RpcOutcomeListener rpcOutcomeListener; + + public AuthenticationOutcomeListener(BasicClient client, + R connection, T saslRpcType, UserGroupInformation ugi, + RpcOutcomeListener rpcOutcomeListener) { +this.client = client; +this.connection = connection; +this.saslRpcType = saslRpcType; +this.ugi = ugi; +this.rpcOutcomeListener = rpcOutcomeListener; + } + + public void initiate(final String mechanismName) { +logger.trace("Initiating SASL exchange."); +try { + final ByteString responseData; + final SaslClient saslClient = connection.getSaslClient(); + if (saslClient.hasInitialResponse()) { +responseData = ByteString.copyFrom(evaluateChallenge(ugi, saslClient, new byte[0])); + } else { +responseData = ByteString.EMPTY; + } + client.send(new AuthenticationOutcomeListener<>(client, connection, saslRpcType, ugi, rpcOutcomeListener), + connection, + saslRpcType, + SaslMessage.newBuilder() + .setMechanism(mechanismName) + .setStatus(SaslStatus.SASL_START) + .setData(responseData) + .build(), + SaslMessage.class, + true /** the connection will not be backed up at this point */); + logger.trace("Initiated SASL exchange."); +} catch (final Exception e) { + rpcOutcomeListener.failed(RpcException.mapException(e)); +} + } + + @Override + public void failed(RpcException ex) { +rpcOutcomeListener.failed(RpcException.mapException(ex)); + } + + @Override + public void success(SaslMessage value, By
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user sudheeshkatkam commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102390094 --- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java --- @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.common; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +public final class KerberosUtil { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(KerberosUtil.class); + + public static final String KERBEROS_SASL_NAME = "GSSAPI"; + + public static final String KERBEROS_SIMPLE_NAME = "KERBEROS"; --- End diff -- + For a user, _auth=KERBEROS_ seems easier than _auth=GSSAPI_, and so I thought why not for admin as well (just like other Hadoop projects). + [This link](http://docs.oracle.com/javase/jndi/tutorial/ldap/security/gssapi.html) says _GSS-API SASL mechanism was retrofitted to mean only Kerberos v5._ --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/754#discussion_r102382826 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java --- @@ -57,6 +57,12 @@ private BatchSchema.SelectionVectorMode svMode = BatchSchema.SelectionVectorMode.NONE; private SelectionVector2 sv2; + /** + * Disk I/O buffer used for all reads and writes of DrillBufs. + */ + + private byte buffer[] = new byte[32*1024]; --- End diff -- The key issues with the location of the buffer are: * Want to reuse the buffer as much as possible. (Hence, putting it on the operator is a good idea.) * Want to keep interfaces simple (passing the buffer from the operator to everything that needs it is awkward.) * Can only be shared by a single thread, obviously. After playing around, it turns out we can move the read & write methods onto the {{BufferAllocator}} class. This makes them available to anything that uses Drillbufs. And, it allows the allocator to hold on to the (shared) I/O buffer. Reflecting on the sort, it becomes clear that such a change is necessary. In the merge phase, the sort will have many spill runs open; each will have its own {{VectorAccessibleSerializable}}, each with its own buffer. Moving the buffer to the allocator reduces the needs to a single, shared, buffer. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user ppadma commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r102378871 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java --- @@ -178,6 +182,19 @@ public Foreman(final WorkerBee bee, final DrillbitContext drillbitContext, final QueryState initialState = queuingEnabled ? QueryState.ENQUEUED : QueryState.STARTING; recordNewState(initialState); enqueuedQueries.inc(); + +profileOption = setProfileOption(queryContext.getOptions()); + } + + private ProfileOption setProfileOption(OptionManager options) { +if (! options.getOption(ExecConstants.ENABLE_QUERY_PROFILE_VALIDATOR)) { --- End diff -- nit: extra space after ! --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user ppadma commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r102378651 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java --- @@ -413,4 +413,15 @@ String DYNAMIC_UDF_SUPPORT_ENABLED = "exec.udf.enable_dynamic_support"; BooleanValidator DYNAMIC_UDF_SUPPORT_ENABLED_VALIDATOR = new BooleanValidator(DYNAMIC_UDF_SUPPORT_ENABLED, true, true); + + /** + * Option to save query profiles. If false, no query profile will be saved + * for any query. + */ + String ENABLE_QUERY_PROFILE_OPTION = "exec.query_profile.enable"; --- End diff -- After reading your comment, it feels like exec.query_profile.save would be a better choice. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/754#discussion_r102378905 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java --- @@ -57,6 +57,12 @@ private BatchSchema.SelectionVectorMode svMode = BatchSchema.SelectionVectorMode.NONE; private SelectionVector2 sv2; + /** + * Disk I/O buffer used for all reads and writes of DrillBufs. + */ + + private byte buffer[] = new byte[32*1024]; --- End diff -- Read a 18 GB file on disk using just a test program that uses various size buffers. {code} 32K buffer: Rate: 799 MB/s 64K buffer: Rate: 766 MB/s {code} So, seems no advantage of a larger buffer. (Tests with a smaller buffer do slow things down, hence the 32K size.) On direct memory: can't use direct memory as the fundamental problem is that data is in a direct memory DrillBuf, and must be copied to heap memory for writing. The original code does the copy by allocating a heap buffer the same size as the vector (16 MB, 32 MB or larger.) This code does the copying by reusing the same buffer over and over. No need to hold the buffer on the operator. This class is used for an entire spill/read session. What may be an issue, however, is the merge phase of a sort when many files are open and so many buffers are created. The reads are synchronous, so they could share a buffer. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r102378009 --- Diff: exec/java-exec/src/main/resources/drill-module.conf --- @@ -162,6 +162,9 @@ drill.exec: { size: 1 } }, + foreman : { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r102377974 --- Diff: exec/java-exec/src/main/resources/drill-module.conf --- @@ -162,6 +162,9 @@ drill.exec: { size: 1 } }, + foreman : { --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r102377957 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/work/foreman/Foreman.java --- @@ -853,7 +875,9 @@ public void close() throws Exception { // storage write; query completion occurs in parallel with profile // persistence. - queryManager.writeFinalProfile(uex); + if (profileOption == ProfileOption.ASYNC) { +writeProfile(uex); --- End diff -- I dislike redundant code, but sure, it's just one line, I can paste it twice. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #747: DRILL-5257: Run-time control of query profiles
Github user paul-rogers commented on a diff in the pull request: https://github.com/apache/drill/pull/747#discussion_r10236 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/ExecConstants.java --- @@ -413,4 +413,19 @@ String DYNAMIC_UDF_SUPPORT_ENABLED = "exec.udf.enable_dynamic_support"; BooleanValidator DYNAMIC_UDF_SUPPORT_ENABLED_VALIDATOR = new BooleanValidator(DYNAMIC_UDF_SUPPORT_ENABLED, true, true); + + /** + * Option to save query profiles. + * + * async (default): Write query profile after last response + * to the client. + * sync: Write the query profile before the last response to + * the client. Very useful for testing to avoid race conditions. + * none: Don't write the query profile at all. Useful when running + * many production jobs that do not need to be reviewed. + * + */ + String QUERY_PROFILE_OPTION = "exec.profile"; --- End diff -- Revised to be two boolean flags. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #750: DRILL-5273: CompliantTextReader excessive memory us...
Github user chunhui-shi commented on a diff in the pull request: https://github.com/apache/drill/pull/750#discussion_r102377194 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/easy/text/compliant/CompliantTextRecordReader.java --- @@ -118,12 +118,21 @@ public boolean apply(@Nullable SchemaPath path) { * @param outputMutator Used to create the schema in the output record batch * @throws ExecutionSetupException */ + @SuppressWarnings("resource") @Override public void setup(OperatorContext context, OutputMutator outputMutator) throws ExecutionSetupException { oContext = context; -readBuffer = context.getManagedBuffer(READ_BUFFER); -whitespaceBuffer = context.getManagedBuffer(WHITE_SPACE_BUFFER); +// Note: DO NOT use managed buffers here. They remain in existence +// until the fragment is shut down. The buffers here are large. --- End diff -- I think the reason you chose to use context.getAllocator() was you don't want to fragmentize managed buffer? Otherwise you might just call readBuffer.close()? Was there any problem with managed buffer's release? Just curious about the "DO NOT use managed buffer here" part. Besides that, +1. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on the issue: https://github.com/apache/drill/pull/729 I have addressed the comments from the earlier pull request. Please take a look --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #755: DRILL-5270: Improve loading of profiles listing in the Web...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/755 For 8266 profiles, when measured from Chrome browser's Network tool: ``` Load First Time: 2.43s Load Second Time (no new profiles): 829ms ``` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/613#discussion_r102364337 --- Diff: exec/jdbc/src/main/java/org/apache/drill/jdbc/impl/DrillMetaImpl.java --- @@ -78,19 +111,237 @@ private MetaResultSet s(String s) { } } + /** Information about type mapping. */ + private static class TypeInfo { +private static Map, TypeInfo> MAPPING = ImmutableMap., TypeInfo> builder() +.put(boolean.class, of(Types.BOOLEAN, "BOOLEAN")) +.put(Boolean.class, of(Types.BOOLEAN, "BOOLEAN")) +.put(Byte.TYPE, of(Types.TINYINT, "TINYINT")) +.put(Byte.class, of(Types.TINYINT, "TINYINT")) +.put(Short.TYPE, of(Types.SMALLINT, "SMALLINT")) +.put(Short.class, of(Types.SMALLINT, "SMALLINT")) +.put(Integer.TYPE, of(Types.INTEGER, "INTEGER")) +.put(Integer.class, of(Types.INTEGER, "INTEGER")) +.put(Long.TYPE, of(Types.BIGINT, "BIGINT")) +.put(Long.class, of(Types.BIGINT, "BIGINT")) +.put(Float.TYPE, of(Types.FLOAT, "FLOAT")) +.put(Float.class, of(Types.FLOAT, "FLOAT")) +.put(Double.TYPE, of(Types.DOUBLE, "DOUBLE")) +.put(Double.class, of(Types.DOUBLE, "DOUBLE")) +.put(String.class, of(Types.VARCHAR, "CHARACTER VARYING")) +.put(java.sql.Date.class, of(Types.DATE, "DATE")) +.put(Time.class, of(Types.TIME, "TIME")) +.put(Timestamp.class, of(Types.TIMESTAMP, "TIMESTAMP")) +.build(); + +private final int sqlType; +private final String sqlTypeName; + +public TypeInfo(int sqlType, String sqlTypeName) { + this.sqlType = sqlType; + this.sqlTypeName = sqlTypeName; +} +private static TypeInfo of(int sqlType, String sqlTypeName) { + return new TypeInfo(sqlType, sqlTypeName); +} - @Override - protected MetaResultSet createEmptyResultSet(Class clazz) { -return s( -"SELECT '' AS `Interim zero-row result set` " // dummy row type -+ "FROM INFORMATION_SCHEMA.CATALOGS " // any table -+ "LIMIT 0"// zero rows -); +public static TypeInfo get(Class clazz) { + return MAPPING.get(clazz); +} } - @Override - public MetaResultSet getTables(String catalog, final Pat schemaPattern, final Pat tableNamePattern, + /** Metadata describing a column. + * Copied from Avatica with several fixes + * */ + public static class MetaColumn implements Named { +public final String tableCat; +public final String tableSchem; +public final String tableName; +public final String columnName; +public final int dataType; +public final String typeName; +public final Integer columnSize; +public final Integer bufferLength = null; +public final Integer decimalDigits; +public final Integer numPrecRadix; +public final int nullable; +public final String remarks = null; +public final String columnDef = null; +public final Integer sqlDataType = null; +public final Integer sqlDatetimeSub = null; +public final Integer charOctetLength; +public final int ordinalPosition; +@NotNull +public final String isNullable; +public final String scopeCatalog = null; +public final String scopeSchema = null; +public final String scopeTable = null; +public final Short sourceDataType = null; +@NotNull +public final String isAutoincrement = ""; +@NotNull +public final String isGeneratedcolumn = ""; + +public MetaColumn( +String tableCat, +String tableSchem, +String tableName, +String columnName, +int dataType, +String typeName, +Integer columnSize, +Integer decimalDigits, +Integer numPrecRadix, +int nullable, +Integer charOctetLength, +int ordinalPosition, +String isNullable) { + this.tableCat = tableCat; + this.tableSchem = tableSchem; + this.tableName = tableName; + this.columnName = columnName; + this.dataType = dataType; + this.typeName = typeName; + this.columnSize = columnSize; + this.decimalDigits = decimalDigits; + this.numPrecRadix = numPrecRadix; + this.nullable = nullable; + this.charOctetLength = charOctetLength; + this.ordinalPosition = ordinalPosition; + this.isNullable = isNullable; +} + +@Override +public String getName() { + return columnName; +} + } +
[GitHub] drill issue #755: DRILL-5270: Improve loading of profiles listing in the Web...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/755 A summary of the performance is available in this [comment](https://issues.apache.org/jira/browse/DRILL-5270?focusedCommentId=15877119&page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#comment-15877119) on the JIRA (DRILL-5270) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/613#discussion_r102363803 --- Diff: exec/jdbc/src/main/java/org/apache/drill/jdbc/impl/DrillCursor.java --- @@ -24,33 +24,260 @@ import java.util.ArrayList; import java.util.Calendar; import java.util.List; +import java.util.concurrent.CountDownLatch; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; import org.apache.calcite.avatica.AvaticaResultSet; +import org.apache.calcite.avatica.AvaticaStatement; import org.apache.calcite.avatica.ColumnMetaData; +import org.apache.calcite.avatica.Meta; +import org.apache.calcite.avatica.Meta.Signature; import org.apache.calcite.avatica.util.ArrayImpl.Factory; import org.apache.calcite.avatica.util.Cursor; import org.apache.drill.common.exceptions.UserException; +import org.apache.drill.exec.ExecConstants; +import org.apache.drill.exec.client.DrillClient; import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.proto.UserBitShared.QueryId; +import org.apache.drill.exec.proto.UserBitShared.QueryResult.QueryState; +import org.apache.drill.exec.proto.UserBitShared.QueryType; +import org.apache.drill.exec.proto.helper.QueryIdHelper; import org.apache.drill.exec.record.BatchSchema; import org.apache.drill.exec.record.RecordBatchLoader; +import org.apache.drill.exec.rpc.ConnectionThrottle; import org.apache.drill.exec.rpc.user.QueryDataBatch; +import org.apache.drill.exec.rpc.user.UserResultsListener; import org.apache.drill.exec.store.ischema.InfoSchemaConstants; +import org.apache.drill.jdbc.SchemaChangeListener; import org.slf4j.Logger; +import com.google.common.collect.Queues; + class DrillCursor implements Cursor { + + + // ResultsListener: + static class ResultsListener implements UserResultsListener { +private static final org.slf4j.Logger logger = +org.slf4j.LoggerFactory.getLogger(ResultsListener.class); + +private static volatile int nextInstanceId = 1; + +/** (Just for logging.) */ +private final int instanceId; + +private final int batchQueueThrottlingThreshold; + +/** (Just for logging.) */ +private volatile QueryId queryId; + +/** (Just for logging.) */ +private int lastReceivedBatchNumber; +/** (Just for logging.) */ +private int lastDequeuedBatchNumber; + +private volatile UserException executionFailureException; + +// TODO: Revisit "completed". Determine and document exactly what it +// means. Some uses imply that it means that incoming messages indicate +// that the _query_ has _terminated_ (not necessarily _completing_ +// normally), while some uses imply that it's some other state of the +// ResultListener. Some uses seem redundant.) +volatile boolean completed = false; + +/** Whether throttling of incoming data is active. */ +private final AtomicBoolean throttled = new AtomicBoolean( false ); +private volatile ConnectionThrottle throttle; + +private volatile boolean closed = false; + +private final CountDownLatch firstMessageReceived = new CountDownLatch(1); + +final LinkedBlockingDeque batchQueue = +Queues.newLinkedBlockingDeque(); + + +/** + * ... + * @param batchQueueThrottlingThreshold + * queue size threshold for throttling server + */ +ResultsListener( int batchQueueThrottlingThreshold ) { + instanceId = nextInstanceId++; + this.batchQueueThrottlingThreshold = batchQueueThrottlingThreshold; + logger.debug( "[#{}] Query listener created.", instanceId ); +} + +/** + * Starts throttling if not currently throttling. + * @param throttle the "throttlable" object to throttle + * @return true if actually started (wasn't throttling already) + */ +private boolean startThrottlingIfNot( ConnectionThrottle throttle ) { + final boolean started = throttled.compareAndSet( false, true ); + if ( started ) { +this.throttle = throttle; +throttle.setAutoRead(false); + } + return started; +} + +/** + * Stops throttling if currently throttling. + * @return true if actually stopped (was throttling) + */ +private boolean stopThrottlingIfSo() { + final boolean stopped = throttled.compareAndSet( true, false ); + if ( stopped ) { +throttle.setAutoRead(true);
[GitHub] drill pull request #755: DRILL-5270: Improve loading of profiles listing in ...
GitHub user kkhatua opened a pull request: https://github.com/apache/drill/pull/755 DRILL-5270: Improve loading of profiles listing in the WebUI Using Hadoop API to filter and reduce profile list load time Using an in-memory treeSet-based cache, maintain the list of most recent profiles. You can merge this pull request into a Git repository by running: $ git pull https://github.com/kkhatua/drill DRILL-5270 Alternatively you can review and apply these changes as the patch at: https://github.com/apache/drill/pull/755.patch To close this pull request, make a commit to your master/trunk branch with (at least) the following in the commit message: This closes #755 commit a5f20643850ad399622e5df9a6f37713545dc7a6 Author: Kunal Khatua Date: 2017-02-22T01:20:48Z DRILL-5270: Improve loading of profiles listing in the WebUI Using Hadoop API to filter and reduce profile list load time Using an in-memory treeSet-based cache, maintain the list of most recent profiles. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102362622 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java --- @@ -109,7 +121,37 @@ public MockSchema(MockStorageEngine engine) { @Override public Table getTable(String name) { - Pattern p = Pattern.compile("(\\w+)_(\\d+)(k|m)?", Pattern.CASE_INSENSITIVE); + if (name.toLowerCase().endsWith(".json") ) { +return getConfigFile(name); + } else { +return getDirectTable(name); + } +} + +private Table getConfigFile(String name) { + final URL url = Resources.getResource(name); + if (url == null) { +throw new IllegalArgumentException( +"Unable to find mock table config file " + name); + } + MockTableDef mockTableDefn; + try { +String json = Resources.toString(url, Charsets.UTF_8); +final ObjectMapper mapper = new ObjectMapper(); +mapper.configure(JsonParser.Feature.ALLOW_UNQUOTED_FIELD_NAMES, true); +mockTableDefn = mapper.readValue(json, MockTableDef.class); + } catch (JsonParseException e) { +throw new IllegalArgumentException( "Unable to parse mock table definition file: " + name, e ); + } catch (JsonMappingException e) { +throw new IllegalArgumentException( "Unable to Jackson deserialize mock table definition file: " + name, e ); + } catch (IOException e) { +throw new IllegalArgumentException( "Unable to read mock table definition file: " + name, e ); + } + return new DynamicDrillTable(engine, this.name, mockTableDefn.getEntries() ); --- End diff -- Please remove extra space before last` )`. there are few other places too like line 169 in this file, line 199 (MockTableDef.java), etc. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102336171 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/package-info.java --- @@ -60,14 +62,26 @@ * The mode is one of the supported Drill * {@link DataMode} names: usually OPTIONAL or REQUIRED. * + * + * Recent extensions include: + * + * repeat in either the "entry" or "record" elements allow --- End diff -- I just found repeat definition in `MockColumn` but not in `MockScanEntry` whereas here in comment and example `example-mock.json` we are showing repeat property at entry level. Is this work in progress ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102320948 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockStorageEngine.java --- @@ -89,14 +85,30 @@ public boolean supportsRead() { return true; } -// public static class ImplicitTable extends DynamicDrillTable { -// -//public ImplicitTable(StoragePlugin plugin, String storageEngineName, -//Object selection) { -// super(plugin, storageEngineName, selection); -//} -// -// } + /** + * Resolves table names within the mock data source. Tables can be of two forms: + * + * _ + * + * Where the "name" can be anything, "n" is the number of rows, and "unit" is + * the units for the row count: non, K (thousand) or M (million). + * + * The above form generates a table directly with no other information needed. + * Column names must be provided, and must be of the form: + * + * _ + * + * Where the name can be anything, the type must be i (integer), d (double) + * or s (string, AKA VarChar). The length is needed only for string fields. --- End diff -- how about boolean (b) as a type ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102294277 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/BooleanGen.java --- @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.store.mock; + +import java.util.Random; + +import org.apache.drill.exec.vector.BitVector; +import org.apache.drill.exec.vector.ValueVector; + +public class BooleanGen implements FieldGen { + + Random rand = new Random( ); + + @Override + public void setup(ColumnDef colDef) { } + + public int value( ) { --- End diff -- Extra space between `()` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102298318 --- Diff: exec/java-exec/src/test/java/org/apache/drill/exec/fn/interp/ExpressionInterpreterTest.java --- @@ -124,7 +125,7 @@ public void interpreterDateTest() throws Exception { final BitControl.PlanFragment planFragment = BitControl.PlanFragment.getDefaultInstance(); final QueryContextInformation queryContextInfo = planFragment.getContext(); final inttimeZoneIndex = queryContextInfo.getTimeZone(); -final org.joda.time.DateTimeZone timeZone = org.joda.time.DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex)); +final DateTimeZone timeZone = DateTimeZone.forID(org.apache.drill.exec.expr.fn.impl.DateUtility.getTimeZone(timeZoneIndex)); --- End diff -- Please remove extra space after `=` --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #752: DRILL-5258: Access mock data definition from SQL
Github user sohami commented on a diff in the pull request: https://github.com/apache/drill/pull/752#discussion_r102360734 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/store/mock/MockGroupScanPOP.java --- @@ -75,20 +76,50 @@ */ private boolean extended; + private ScanStats scanStats = ScanStats.TRIVIAL_TABLE; @JsonCreator public MockGroupScanPOP(@JsonProperty("url") String url, - @JsonProperty("extended") Boolean extended, @JsonProperty("entries") List readEntries) { super((String) null); this.readEntries = readEntries; this.url = url; -this.extended = extended == null ? false : extended; + +// Compute decent row-count stats for this mock data source so that +// the planner is "fooled" into thinking that this operator wil do +// disk I/O. + +int rowCount = 0; +int rowWidth = 0; +for (MockScanEntry entry : readEntries) { + rowCount += entry.getRecords(); + int width = 0; + if (entry.getTypes() == null) { +width = 50; + } else { +for (MockColumn col : entry.getTypes()) { + int colWidth = 0; + if (col.getWidthValue() == 0) { +colWidth = TypeHelper.getSize(col.getMajorType()); + } else { +colWidth = col.getWidthValue(); + } + colWidth *= col.getRepeatCount(); + width += colWidth; +} + } + rowWidth = Math.max(rowWidth, width); --- End diff -- `rowWidth` seems to be `maxRowWidth` and `width` is `rowWidth`. Can we please rename these ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #613: DRILL-4730: Update JDBC DatabaseMetaData implementa...
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/613#discussion_r102363018 --- Diff: exec/jdbc/src/test/java/org/apache/drill/jdbc/DatabaseMetaDataGetColumnsTest.java --- @@ -2712,22 +2712,18 @@ public void test_SOURCE_DATA_TYPE_hasSameNameAndLabel() throws SQLException { @Test public void test_SOURCE_DATA_TYPE_hasRightTypeString() throws SQLException { -// TODO(DRILL-2135): Resolve workaround: -//assertThat( rsMetadata.getColumnTypeName( 22 ), equalTo( "SMALLINT" ) ); -assertThat( rowsMetadata.getColumnTypeName( 22 ), equalTo( "INTEGER" ) ); +assertThat( rowsMetadata.getColumnTypeName( 22 ), equalTo( "SMALLINT" ) ); --- End diff -- no, it isn't unfortunately --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102304062 --- Diff: exec/rpc/src/main/java/org/apache/drill/exec/rpc/ClientConnection.java --- @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import javax.security.sasl.SaslClient; + +public interface ClientConnection extends RemoteConnection { --- End diff -- Is `ClientConnection` the right place to pass along the saslClient vs `AuthenticationCommand`? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102344639 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ServerAuthenticationHandler.java --- @@ -0,0 +1,269 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import com.google.protobuf.Internal.EnumLite; +import com.google.protobuf.InvalidProtocolBufferException; +import io.netty.buffer.ByteBuf; +import io.netty.buffer.ByteBufInputStream; +import org.apache.drill.exec.proto.UserBitShared.SaslMessage; +import org.apache.drill.exec.proto.UserBitShared.SaslStatus; +import org.apache.drill.exec.rpc.RequestHandler; +import org.apache.drill.exec.rpc.Response; +import org.apache.drill.exec.rpc.ResponseSender; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.ServerConnection; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.util.EnumMap; +import java.util.Map; + +import static com.google.common.base.Preconditions.checkNotNull; + +public class ServerAuthenticationHandler implements RequestHandler { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(ServerAuthenticationHandler.class); + + private static final ImmutableMap RESPONSE_PROCESSORS; + + static { +final Map map = new EnumMap<>(SaslStatus.class); +map.put(SaslStatus.SASL_START, new SaslStartProcessor()); +map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor()); +map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor()); +map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor()); +RESPONSE_PROCESSORS = Maps.immutableEnumMap(map); + } + + private final RequestHandler requestHandler; + private final int saslRequestTypeValue; + private final T saslResponseType; + + public ServerAuthenticationHandler(final RequestHandler requestHandler, final int saslRequestTypeValue, + final T saslResponseType) { +this.requestHandler = requestHandler; +this.saslRequestTypeValue = saslRequestTypeValue; +this.saslResponseType = saslResponseType; + } + + @Override + public void handle(C connection, int rpcType, ByteBuf pBody, ByteBuf dBody, ResponseSender sender) + throws RpcException { +final String remoteAddress = connection.getRemoteAddress().toString(); + +// exchange involves server "challenges" and client "responses" (initiated by client) +if (saslRequestTypeValue == rpcType) { + final SaslMessage saslResponse; + try { +saslResponse = SaslMessage.PARSER.parseFrom(new ByteBufInputStream(pBody)); + } catch (final InvalidProtocolBufferException e) { +handleAuthFailure(connection, remoteAddress, sender, e, saslResponseType); +return; + } + + logger.trace("Received SASL message {} from {}", saslResponse.getStatus(), remoteAddress); + final SaslResponseProcessor processor = RESPONSE_PROCESSORS.get(saslResponse.getStatus()); + if (processor == null) { +logger.info("Unknown message type from client from {}. Will stop authentication.", remoteAddress); +handleAuthFailure(connection, remoteAddress, sender, new SaslException("Received unexpected message"), +saslResponseType); +return; + } + + final SaslResponseContext context = new SaslResponseContext<>(saslResponse, connection, remote
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101601121 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +// loads all the available mechanism and factories in the sasl_lib referenced by the path +const int err = sasl_client_init(NULL); +if (0 != err) { +std::stringstream errMsg; +errMsg << "Failed to load authentication libraries. code: " << err; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;) +throw std::runtime_error(errMsg.str().c_str()); +} +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 0; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +s_initialized = true; +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_pwd_secret) { +free(m_pwd_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { +const std::string* const username = static_cast(context); + +if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id) +&& username != NULL) { +*result = username->c_str(); +// *len = (unsigned int) username->length(); +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void *context, int id, sasl_secret_t **psecret) { +const SaslAuthenticatorImpl* const authenticator = static_cast(context); + +if (SASL_CB_PASS == id) { +*psecret = authenticator->m_pwd_secret; +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::init(const std::vector& mechanisms, exec::shared::SaslMessage& response) { +// find and set parameters +std::string authMechanismToUse; +std::string serviceName; +std::string serviceHost; +for (size_t i = 0; i < m_properties->size(); i++) { --- End diff --
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102330939 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/plain/PlainFactory.java --- @@ -0,0 +1,166 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security.plain; + +import org.apache.drill.common.config.DrillProperties; +import org.apache.drill.exec.rpc.security.AuthenticatorFactory; +import org.apache.drill.exec.rpc.security.FastSaslServerFactory; +import org.apache.drill.exec.rpc.security.FastSaslClientFactory; +import org.apache.drill.exec.rpc.user.security.UserAuthenticationException; +import org.apache.drill.exec.rpc.user.security.UserAuthenticator; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.auth.callback.Callback; +import javax.security.auth.callback.CallbackHandler; +import javax.security.auth.callback.NameCallback; +import javax.security.auth.callback.PasswordCallback; +import javax.security.auth.callback.UnsupportedCallbackException; +import javax.security.auth.login.LoginException; +import javax.security.sasl.AuthorizeCallback; +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; +import java.security.Security; +import java.util.Map; + +public class PlainFactory implements AuthenticatorFactory { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(PlainFactory.class); + + public static final String SIMPLE_NAME = PlainServer.MECHANISM_NAME; + + static { +Security.addProvider(new PlainServer.PlainServerProvider()); + } + + private final UserAuthenticator authenticator; + + public PlainFactory() { +this.authenticator = null; + } + + public PlainFactory(final UserAuthenticator authenticator) { +this.authenticator = authenticator; + } + + @Override + public String getSimpleName() { +return SIMPLE_NAME; + } + + @Override + public UserGroupInformation createAndLoginUser(Map properties) throws IOException { +final Configuration conf = new Configuration(); +UserGroupInformation.setConfiguration(conf); +try { + return UserGroupInformation.getCurrentUser(); +} catch (final IOException e) { + logger.debug("Login failed.", e); + final Throwable cause = e.getCause(); + if (cause instanceof LoginException) { +throw new SaslException("Failed to login.", cause); + } + throw new SaslException("Unexpected failure trying to login. ", cause); +} + } + + @Override + public SaslServer createSaslServer(final UserGroupInformation ugi, final Map properties) + throws SaslException { +return FastSaslServerFactory.getInstance().createSaslServer(SIMPLE_NAME, null /** protocol */, +null /** serverName */, properties, new PlainServerCallbackHandler()); + } + + @Override + public SaslClient createSaslClient(final UserGroupInformation ugi, final Map properties) + throws SaslException { +final String userName = (String) properties.get(DrillProperties.USER); +final String password = (String) properties.get(DrillProperties.PASSWORD); + +return FastSaslClientFactory.getInstance().createSaslClient(new String[]{SIMPLE_NAME}, +null /** authorization ID */, null, null, properties, new CallbackHandler() { + @Override + public void handle(final Callback[] callbacks) throws IOException, UnsupportedCallbackException { +for (final Callback callback : callbacks) { + if (callback instanceof NameCallback) { +NameCallback.class.cast(callback).setName(userName); +
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102295865 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/data/DataClient.java --- @@ -75,27 +87,118 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { + protected void handle(DataClientConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { throw new UnsupportedOperationException("DataClient is unidirectional by design."); } BufferAllocator getAllocator() { -return allocator; +return config.getAllocator(); } @Override protected void validateHandshake(BitServerHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != DataRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), DataRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication + final SaslClient saslClient; + try { +saslClient = config.getAuthFactory(handshake.getAuthenticationMechanismsList()) +.createSaslClient(UserGroupInformation.getLoginUser(), +config.getSaslClientProperties(remoteEndpoint)); + } catch (final IOException e) { +throw new RpcException(String.format("Failed to initiate authenticate to %s", remoteEndpoint.getAddress()), e); + } + if (saslClient == null) { +throw new RpcException("Unexpected failure. Could not initiate SASL exchange."); + } + connection.setSaslClient(saslClient); +} else { + if (config.getAuthMechanismToUse() != null) { +throw new RpcException(String.format("Drillbit (%s) does not require auth, but auth is enabled.", +remoteEndpoint.getAddress())); + } } } @Override - protected void finalizeConnection(BitServerHandshake handshake, DataClientConnection connection) { + protected RpcCommand + getInitialCommand(final RpcCommand command) { +if (config.getAuthMechanismToUse() == null) { + return super.getInitialCommand(command); +} else { + return new AuthenticationCommand<>(command); --- End diff -- shouldn't we use `super.getInitialCommand(command)` here too? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101598598 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +// loads all the available mechanism and factories in the sasl_lib referenced by the path +const int err = sasl_client_init(NULL); +if (0 != err) { +std::stringstream errMsg; +errMsg << "Failed to load authentication libraries. code: " << err; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;) +throw std::runtime_error(errMsg.str().c_str()); +} +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 0; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +s_initialized = true; +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_pwd_secret) { +free(m_pwd_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { +const std::string* const username = static_cast(context); + +if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id) +&& username != NULL) { +*result = username->c_str(); +// *len = (unsigned int) username->length(); +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void *context, int id, sasl_secret_t **psecret) { +const SaslAuthenticatorImpl* const authenticator = static_cast(context); + +if (SASL_CB_PASS == id) { +*psecret = authenticator->m_pwd_secret; +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::init(const std::vector& mechanisms, exec::shared::SaslMessage& response) { +// find and set parameters +std::string authMechanismToUse; +std::string serviceName; +std::string serviceHost; +for (size_t i = 0; i < m_properties->size(); i++) { +const s
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101116414 --- Diff: common/src/main/java/org/apache/drill/common/map/CaseInsensitiveMap.java --- @@ -55,6 +55,18 @@ } /** + * Returns a new instance of {@link java.util.HashMap}, with key case-insensitivity, of expected size. --- End diff -- okay :) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102347409 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java --- @@ -88,22 +124,178 @@ public void submitQuery(UserResultsListener resultsListener, RunQuery query) { send(queryResultHandler.getWrappedListener(resultsListener), RpcType.RUN_QUERY, query, QueryId.class); } - public void connect(RpcConnectionHandler handler, DrillbitEndpoint endpoint, - UserProperties props, UserBitShared.UserCredentials credentials) { + public CheckedFuture connect(DrillbitEndpoint endpoint, DrillProperties parameters, + UserCredentials credentials) { +final FutureHandler handler = new FutureHandler(); UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder() .setRpcVersion(UserRpcConfig.RPC_VERSION) .setSupportListening(true) .setSupportComplexTypes(supportComplexTypes) .setSupportTimeout(true) .setCredentials(credentials) -.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)); +.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)) +.setSaslSupport(SaslSupport.SASL_AUTH) +.setProperties(parameters.serializeForServer()); +this.properties = parameters; + + connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), +hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +return handler; + } -if (props != null) { - hsBuilder.setProperties(props); + /** + * Check (after {@link #connect connecting}) if server requires authentication. + * + * @return true if server requires authentication + */ + public boolean serverRequiresAuthentication() { +return serverAuthMechanisms != null; + } + + /** + * Returns a list of supported authentication mechanism. If called before {@link #connect connecting}, + * returns null. If called after {@link #connect connecting}, returns a list of supported mechanisms + * iff authentication is required. + * + * @return list of supported authentication mechanisms + */ + public List getSupportedAuthenticationMechanisms() { +return serverAuthMechanisms; + } + + /** + * Authenticate to the server asynchronously. Returns a future that {@link CheckedFuture#checkedGet results} + * in null if authentication succeeds, or throws a {@link SaslException} with relevant message if + * authentication fails. + * + * This method uses properties provided at {@link #connect connection time} and override them with the + * given properties, if any. + * + * @param overrides parameter overrides + * @return result of authentication request + */ + public CheckedFuture authenticate(final DrillProperties overrides) { --- End diff -- is there any need (other than for testing?) to not include authentication in the connection process? From my point of view, it should be included since the user already provided all the needed properties (overrides is NULL in DrillClient), and the user cannot do anything until authenticated anyway... --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102346707 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserClient.java --- @@ -88,22 +129,183 @@ public void submitQuery(UserResultsListener resultsListener, RunQuery query) { send(queryResultHandler.getWrappedListener(resultsListener), RpcType.RUN_QUERY, query, QueryId.class); } - public void connect(RpcConnectionHandler handler, DrillbitEndpoint endpoint, - UserProperties props, UserBitShared.UserCredentials credentials) { + public CheckedFuture connect(DrillbitEndpoint endpoint, DrillProperties parameters, + UserCredentials credentials) { +final FutureHandler handler = new FutureHandler(); UserToBitHandshake.Builder hsBuilder = UserToBitHandshake.newBuilder() .setRpcVersion(UserRpcConfig.RPC_VERSION) .setSupportListening(true) .setSupportComplexTypes(supportComplexTypes) .setSupportTimeout(true) .setCredentials(credentials) -.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)); +.setClientInfos(UserRpcUtils.getRpcEndpointInfos(clientName)) +.setSaslSupport(SaslSupport.SASL_AUTH) +.setProperties(parameters.serializeForServer()); +this.properties = parameters; + + connectAsClient(queryResultHandler.getWrappedConnectionHandler(handler), +hsBuilder.build(), endpoint.getAddress(), endpoint.getUserPort()); +return handler; + } + + /** + * Check (after {@link #connect connecting}) if server requires authentication. + * + * @return true if server requires authentication + */ + public boolean serverRequiresAuthentication() { +return supportedAuthMechs != null; + } + + /** + * Returns a list of supported authentication mechanism. If called before {@link #connect connecting}, + * returns null. If called after {@link #connect connecting}, returns a list of supported mechanisms + * iff authentication is required. + * + * @return list of supported authentication mechanisms + */ + public List getSupportedAuthenticationMechanisms() { --- End diff -- what's the use-case? debugging? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101121400 --- Diff: contrib/native/client/src/clientlib/drillClientImpl.cpp --- @@ -412,37 +427,155 @@ connectionStatus_t DrillClientImpl::validateHandshake(DrillUserProperties* prope if(ret!=CONN_SUCCESS){ return ret; } -if(this->m_handshakeStatus != exec::user::SUCCESS){ -switch(this->m_handshakeStatus){ -case exec::user::RPC_VERSION_MISMATCH: -DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Invalid rpc version. Expected " -<< DRILL_RPC_VERSION << ", actual "<< m_handshakeVersion << "." << std::endl;) -return handleConnError(CONN_BAD_RPC_VER, -getMessage(ERR_CONN_BAD_RPC_VER, DRILL_RPC_VERSION, -m_handshakeVersion, -this->m_handshakeErrorId.c_str(), -this->m_handshakeErrorMsg.c_str())); -case exec::user::AUTH_FAILED: -DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Authentication failed." << std::endl;) -return handleConnError(CONN_AUTH_FAILED, -getMessage(ERR_CONN_AUTHFAIL, -this->m_handshakeErrorId.c_str(), -this->m_handshakeErrorMsg.c_str())); -case exec::user::UNKNOWN_FAILURE: -DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown error during handshake." << std::endl;) -return handleConnError(CONN_HANDSHAKE_FAILED, -getMessage(ERR_CONN_UNKNOWN_ERR, -this->m_handshakeErrorId.c_str(), -this->m_handshakeErrorMsg.c_str())); -default: -break; + +switch(this->m_handshakeStatus) { +case exec::user::SUCCESS: +// reset io_service after handshake is validated before running queries +m_io_service.reset(); +return CONN_SUCCESS; +case exec::user::RPC_VERSION_MISMATCH: +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Invalid rpc version. Expected " +<< DRILL_RPC_VERSION << ", actual "<< m_handshakeVersion << "." << std::endl;) +return handleConnError(CONN_BAD_RPC_VER, getMessage(ERR_CONN_BAD_RPC_VER, DRILL_RPC_VERSION, + m_handshakeVersion, + this->m_handshakeErrorId.c_str(), + this->m_handshakeErrorMsg.c_str())); +case exec::user::AUTH_FAILED: +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Authentication failed." << std::endl;) +return handleConnError(CONN_AUTH_FAILED, getMessage(ERR_CONN_AUTHFAIL, + this->m_handshakeErrorId.c_str(), + this->m_handshakeErrorMsg.c_str())); +case exec::user::UNKNOWN_FAILURE: +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown error during handshake." << std::endl;) +return handleConnError(CONN_HANDSHAKE_FAILED, getMessage(ERR_CONN_UNKNOWN_ERR, + this->m_handshakeErrorId.c_str(), + this->m_handshakeErrorMsg.c_str())); +case exec::user::AUTH_REQUIRED: +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Server requires SASL authentication." << std::endl;) +return handleAuthentication(properties); +default: +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "Unknown return status." << std::endl;) +return handleConnError(CONN_HANDSHAKE_FAILED, getMessage(ERR_CONN_UNKNOWN_ERR, + this->m_handshakeErrorId.c_str(), + this->m_handshakeErrorMsg.c_str())); +} +} + +connectionStatus_t DrillClientImpl::handleAuthentication(const DrillUserProperties *userProperties) { +try { +m_saslAuthenticator = new SaslAuthenticatorImpl(userProperties); --- End diff -- - maybe we should use smart pointers - I'm not strongly opinionated against exceptions in C++, but this is not a common pattern in drill code base --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101599452 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" --- End diff -- Why not keeping the SASL names since they are well-defined? (and also uppercased) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102307826 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserServer.java --- @@ -358,25 +291,59 @@ public BitToUserHandshake getHandshakeResponse(UserToBitHandshake inbound) throw return handleFailure(respBuilder, HandshakeStatus.RPC_VERSION_MISMATCH, errMsg, null); } - if (authenticator != null) { + connection.setHandshake(inbound); + + if (!config.isAuthEnabled()) { + connection.finalizeSession(inbound.getCredentials().getUserName()); +respBuilder.setStatus(HandshakeStatus.SUCCESS); +return respBuilder.build(); + } + + final boolean clientSupportsSasl = inbound.hasSaslSupport() && + (inbound.getSaslSupport().ordinal() >= SaslSupport.SASL_AUTH.ordinal()); + if (!clientSupportsSasl) { // for backward compatibility < 1.10 +final String userName = inbound.getCredentials().getUserName(); +if (logger.isTraceEnabled()) { + logger.trace("User {} on connection {} is likely using an older client.", + userName, connection.getRemoteAddress()); +} try { String password = ""; final UserProperties props = inbound.getProperties(); for (int i = 0; i < props.getPropertiesCount(); i++) { Property prop = props.getProperties(i); -if (UserSession.PASSWORD.equalsIgnoreCase(prop.getKey())) { +if (DrillProperties.PASSWORD.equalsIgnoreCase(prop.getKey())) { password = prop.getValue(); break; } } - authenticator.authenticate(inbound.getCredentials().getUserName(), password); + final PlainFactory plainFactory = config.getAuthProvider().getPlainFactory(); --- End diff -- instead of keeping a deprecated methods, why not looking for `PLAIN` auth mechanism, and instead of calling authenticate, do an internal sasl session? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102278730 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractClientConnection.java --- @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import io.netty.channel.socket.SocketChannel; +import org.slf4j.Logger; + +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; + +public abstract class AbstractClientConnection extends AbstractRemoteConnection implements ClientConnection { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(AbstractClientConnection.class); + + private SaslClient saslClient; + + public AbstractClientConnection(SocketChannel channel, String name) { +super(channel, name); + } + + protected abstract Logger getLogger(); + + @Override + public void setSaslClient(final SaslClient saslClient) { +assert this.saslClient == null; +this.saslClient = saslClient; --- End diff -- According to the interface comment, it's supposed to be only set one, but this is not checked. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102279444 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/AbstractServerConnection.java --- @@ -0,0 +1,121 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import io.netty.channel.socket.SocketChannel; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.hadoop.security.HadoopKerberosName; +import org.apache.hadoop.security.UserGroupInformation; +import org.slf4j.Logger; + +import javax.security.auth.login.LoginException; +import javax.security.sasl.SaslException; +import javax.security.sasl.SaslServer; +import java.io.IOException; + +public abstract class AbstractServerConnection --- End diff -- Your generic type is not fully defined: `AbstractServerConnection>` (it happens all over the place btw. I suspect this is an IntelliJ thing, as Eclipse is printing out lots of warnings. If you can change your IDE settings, and get all the rawtypes warning introduced by your patches, it would be nice!) Although the type definition looks correct, I also wonder if the type to be used shouldn't be `AbstractServerConnection>` instead (as we should rely on the interface instead of the abstract class)? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101123168 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,207 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +s_initialized = true; + +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +sasl_client_init(NULL); +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 1; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_secret) { +free(m_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { --- End diff -- smart pointers can be used too here --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101600048 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.hpp --- @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#ifndef DRILLCLIENT_SASLAUTHENTICATORIMPL_HPP +#define DRILLCLIENT_SASLAUTHENTICATORIMPL_HPP + +#include +#include +#include +#include "drill/drillClient.hpp" +#include "UserBitShared.pb.h" + +#include "sasl/sasl.h" +#include "sasl/saslplug.h" + +namespace Drill { + +class SaslAuthenticatorImpl { + +public: + +static const std::map MECHANISM_MAPPING; --- End diff -- not sure why this is public, isn't this something internal? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101602793 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +// loads all the available mechanism and factories in the sasl_lib referenced by the path +const int err = sasl_client_init(NULL); +if (0 != err) { +std::stringstream errMsg; +errMsg << "Failed to load authentication libraries. code: " << err; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;) +throw std::runtime_error(errMsg.str().c_str()); +} +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 0; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +s_initialized = true; +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_pwd_secret) { +free(m_pwd_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { +const std::string* const username = static_cast(context); + +if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id) +&& username != NULL) { +*result = username->c_str(); +// *len = (unsigned int) username->length(); +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void *context, int id, sasl_secret_t **psecret) { +const SaslAuthenticatorImpl* const authenticator = static_cast(context); + +if (SASL_CB_PASS == id) { +*psecret = authenticator->m_pwd_secret; +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::init(const std::vector& mechanisms, exec::shared::SaslMessage& response) { +// find and set parameters +std::string authMechanismToUse; +std::string serviceName; +std::string serviceHost; +for (size_t i = 0; i < m_properties->size(); i++) { +const s
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101598438 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +// loads all the available mechanism and factories in the sasl_lib referenced by the path +const int err = sasl_client_init(NULL); +if (0 != err) { +std::stringstream errMsg; +errMsg << "Failed to load authentication libraries. code: " << err; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;) +throw std::runtime_error(errMsg.str().c_str()); +} +{ // for debugging purposes +const char **mechanisms = sasl_global_listmech(); +int i = 0; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << "SASL mechanisms available on client: " << std::endl;) +while (mechanisms[i] != NULL) { +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << i << " : " << mechanisms[i] << std::endl;) +i++; +} +} +s_initialized = true; +} +} +} + +SaslAuthenticatorImpl::~SaslAuthenticatorImpl() { +if (m_pwd_secret) { +free(m_pwd_secret); +} +// may be used to negotiated security layers before disposing in the future +if (m_pConnection) { +sasl_dispose(&m_pConnection); +} +m_pConnection = NULL; +} + +typedef int (*sasl_callback_proc_t)(void); // see sasl_callback_ft + +int SaslAuthenticatorImpl::userNameCallback(void *context, int id, const char **result, unsigned *len) { +const std::string* const username = static_cast(context); + +if ((SASL_CB_USER == id || SASL_CB_AUTHNAME == id) +&& username != NULL) { +*result = username->c_str(); +// *len = (unsigned int) username->length(); +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::passwordCallback(sasl_conn_t *conn, void *context, int id, sasl_secret_t **psecret) { +const SaslAuthenticatorImpl* const authenticator = static_cast(context); + +if (SASL_CB_PASS == id) { +*psecret = authenticator->m_pwd_secret; +} +return SASL_OK; +} + +int SaslAuthenticatorImpl::init(const std::vector& mechanisms, exec::shared::SaslMessage& response) { +// find and set parameters +std::string authMechanismToUse; +std::string serviceName; +std::string serviceHost; +for (size_t i = 0; i < m_properties->size(); i++) { +const s
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101600545 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" + +#define KERBEROS_SIMPLE_NAME "kerberos" +#define KERBEROS_SASL_NAME "gssapi" +#define PLAIN_NAME "plain" + +const std::map SaslAuthenticatorImpl::MECHANISM_MAPPING = boost::assign::map_list_of +(KERBEROS_SIMPLE_NAME, KERBEROS_SASL_NAME) +(PLAIN_NAME, PLAIN_NAME) +; + +boost::mutex SaslAuthenticatorImpl::s_mutex; +bool SaslAuthenticatorImpl::s_initialized = false; + +SaslAuthenticatorImpl::SaslAuthenticatorImpl(const DrillUserProperties* const properties) : +m_properties(properties), m_pConnection(NULL), m_pwd_secret(NULL) { + +if (!s_initialized) { +boost::lock_guard lock(SaslAuthenticatorImpl::s_mutex); +if (!s_initialized) { +// set plugin path if provided +if (DrillClientConfig::getSaslPluginPath()) { +char *saslPluginPath = const_cast(DrillClientConfig::getSaslPluginPath()); +sasl_set_path(0, saslPluginPath); +} + +// loads all the available mechanism and factories in the sasl_lib referenced by the path +const int err = sasl_client_init(NULL); +if (0 != err) { +std::stringstream errMsg; +errMsg << "Failed to load authentication libraries. code: " << err; +DRILL_MT_LOG(DRILL_LOG(LOG_TRACE) << errMsg << std::endl;) --- End diff -- or inline errMsg directly into the log statement? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101631177 --- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java --- @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.common; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +public final class KerberosUtil { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(KerberosUtil.class); + + public static final String KERBEROS_SASL_NAME = "GSSAPI"; + + public static final String KERBEROS_SIMPLE_NAME = "KERBEROS"; + + public static final String HOSTNAME_PATTERN = "_HOST"; + + /** + * Returns principal of format primary/instance@REALM. + * + * @param primary non-null primary component + * @param instance non-null instance component + * @param realm non-null realm component + * @return principal of format primary/instance@REALM + */ + public static String getPrincipalFromParts(final String primary, final String instance, final String realm) { +return checkNotNull(primary) + "/" + +checkNotNull(instance) + "@" + +checkNotNull(realm); + } + + /** + * Expects principal of the format primary/instance@REALM. + * + * @param principal principal + * @return components + */ + public static String[] splitPrincipalIntoParts(final String principal) { +final String[] components = principal.split("[/@]"); +checkState(components.length == 3); +checkNotNull(components[0]); +checkNotNull(components[1]); +checkNotNull(components[2]); +return components; + } + + public static String canonicalizedInstanceName(String instanceName, final String canonicalName) { --- End diff -- (style) for consistency, use verb present tense for methods (canonicalizeInstanceName) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102336062 --- Diff: common/src/main/java/org/apache/drill/common/KerberosUtil.java --- @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.common; + +import static com.google.common.base.Preconditions.checkNotNull; +import static com.google.common.base.Preconditions.checkState; + +public final class KerberosUtil { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(KerberosUtil.class); + + public static final String KERBEROS_SASL_NAME = "GSSAPI"; + + public static final String KERBEROS_SIMPLE_NAME = "KERBEROS"; --- End diff -- why not using the SASL name? (in theory, gssapi can work with other systems than kerberos) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102348994 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/user/UserServer.java --- @@ -358,25 +291,59 @@ public BitToUserHandshake getHandshakeResponse(UserToBitHandshake inbound) throw return handleFailure(respBuilder, HandshakeStatus.RPC_VERSION_MISMATCH, errMsg, null); } - if (authenticator != null) { + connection.setHandshake(inbound); + + if (!config.isAuthEnabled()) { + connection.finalizeSession(inbound.getCredentials().getUserName()); +respBuilder.setStatus(HandshakeStatus.SUCCESS); +return respBuilder.build(); + } + + final boolean clientSupportsSasl = inbound.hasSaslSupport() && + (inbound.getSaslSupport().ordinal() >= SaslSupport.SASL_AUTH.ordinal()); + if (!clientSupportsSasl) { // for backward compatibility < 1.10 +final String userName = inbound.getCredentials().getUserName(); +if (logger.isTraceEnabled()) { + logger.trace("User {} on connection {} is likely using an older client.", + userName, connection.getRemoteAddress()); +} try { String password = ""; final UserProperties props = inbound.getProperties(); for (int i = 0; i < props.getPropertiesCount(); i++) { Property prop = props.getProperties(i); -if (UserSession.PASSWORD.equalsIgnoreCase(prop.getKey())) { +if (DrillProperties.PASSWORD.equalsIgnoreCase(prop.getKey())) { password = prop.getValue(); break; } } - authenticator.authenticate(inbound.getCredentials().getUserName(), password); + final PlainFactory plainFactory = config.getAuthProvider().getPlainFactory(); + if (plainFactory == null) { +throw new UserAuthenticationException("The server no longer supports username/password" + +" based authentication. Please talk to your system administrator."); + } + plainFactory.getAuthenticator() + .authenticate(userName, password); + connection.changeHandlerTo(config.getMessageHandler()); + connection.finalizeSession(userName); + respBuilder.setStatus(HandshakeStatus.SUCCESS); + if (logger.isTraceEnabled()) { +logger.trace("Authenticated {} successfully using PLAIN from {}", userName, +connection.getRemoteAddress()); + } + return respBuilder.build(); } catch (UserAuthenticationException ex) { return handleFailure(respBuilder, HandshakeStatus.AUTH_FAILED, ex.getMessage(), ex); } } - connection.setUser(inbound); - return respBuilder.setStatus(HandshakeStatus.SUCCESS).build(); + // mention server's authentication capabilities + respBuilder.addAllAuthenticationMechanisms(config.getAuthProvider().getAllFactoryNames()); + + // for now, this means PLAIN credentials will be sent over twice --- End diff -- just a thought if this is something you're worried about. What prevents the server to detect the plain credentials from the user, and try to perform the authentication directly (if plain is supported)? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102303605 --- Diff: exec/rpc/src/main/java/org/apache/drill/exec/rpc/ServerConnection.java --- @@ -0,0 +1,37 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc; + +import javax.security.sasl.SaslServer; +import java.io.IOException; + +public interface ServerConnection extends RemoteConnection { + + // init only once + void initSaslServer(String mechanismName) throws IOException; --- End diff -- should the server connection exposes SASL protocol directly? isn't ServerAuthenticationHandler a better place to manage the saslServer instance? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102342473 --- Diff: contrib/native/client/src/clientlib/saslAuthenticatorImpl.cpp --- @@ -0,0 +1,211 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include "saslAuthenticatorImpl.hpp" + +#include "drillClientImpl.hpp" +#include "logger.hpp" + +namespace Drill { + +#define DEFAULT_SERVICE_NAME "drill" --- End diff -- these defines should probably be (static) constant instead. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102292416 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java --- @@ -105,8 +128,83 @@ protected void finalizeConnection(BitControlHandshake handshake, ControlConnecti connection.setEndpoint(handshake.getEndpoint()); } - public ControlConnection getConnection() { -return this.connection; + @Override + protected RpcCommand + getInitialCommand(final RpcCommand command) { +if (config.getAuthMechanismToUse() == null) { + return super.getInitialCommand(command); +} else { + return new AuthenticationCommand<>(command); --- End diff -- shouldn't we use `super.getInitialCommand(command)` too? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102344235 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/AuthenticationOutcomeListener.java --- @@ -0,0 +1,238 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + *http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Maps; +import com.google.protobuf.ByteString; +import com.google.protobuf.Internal.EnumLite; +import com.google.protobuf.MessageLite; +import io.netty.buffer.ByteBuf; +import org.apache.drill.exec.proto.UserBitShared.SaslMessage; +import org.apache.drill.exec.proto.UserBitShared.SaslStatus; +import org.apache.drill.exec.rpc.BasicClient; +import org.apache.drill.exec.rpc.ClientConnection; +import org.apache.drill.exec.rpc.RpcException; +import org.apache.drill.exec.rpc.RpcOutcomeListener; +import org.apache.hadoop.security.UserGroupInformation; + +import javax.security.sasl.SaslClient; +import javax.security.sasl.SaslException; +import java.io.IOException; +import java.lang.reflect.UndeclaredThrowableException; +import java.security.PrivilegedExceptionAction; +import java.util.EnumMap; +import java.util.Map; + +import static com.google.common.base.Preconditions.checkNotNull; + +public class AuthenticationOutcomeListener +implements RpcOutcomeListener { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(AuthenticationOutcomeListener.class); + + private static final ImmutableMap CHALLENGE_PROCESSORS; + static { +final Map map = new EnumMap<>(SaslStatus.class); +map.put(SaslStatus.SASL_IN_PROGRESS, new SaslInProgressProcessor()); +map.put(SaslStatus.SASL_SUCCESS, new SaslSuccessProcessor()); +map.put(SaslStatus.SASL_FAILED, new SaslFailedProcessor()); +CHALLENGE_PROCESSORS = Maps.immutableEnumMap(map); + } + + private final BasicClient client; + private final R connection; + private final T saslRpcType; + private final UserGroupInformation ugi; + private final RpcOutcomeListener rpcOutcomeListener; + + public AuthenticationOutcomeListener(BasicClient client, + R connection, T saslRpcType, UserGroupInformation ugi, + RpcOutcomeListener rpcOutcomeListener) { +this.client = client; +this.connection = connection; +this.saslRpcType = saslRpcType; +this.ugi = ugi; +this.rpcOutcomeListener = rpcOutcomeListener; + } + + public void initiate(final String mechanismName) { +logger.trace("Initiating SASL exchange."); +try { + final ByteString responseData; + final SaslClient saslClient = connection.getSaslClient(); + if (saslClient.hasInitialResponse()) { +responseData = ByteString.copyFrom(evaluateChallenge(ugi, saslClient, new byte[0])); + } else { +responseData = ByteString.EMPTY; + } + client.send(new AuthenticationOutcomeListener<>(client, connection, saslRpcType, ugi, rpcOutcomeListener), + connection, + saslRpcType, + SaslMessage.newBuilder() + .setMechanism(mechanismName) + .setStatus(SaslStatus.SASL_START) + .setData(responseData) + .build(), + SaslMessage.class, + true /** the connection will not be backed up at this point */); + logger.trace("Initiated SASL exchange."); +} catch (final Exception e) { + rpcOutcomeListener.failed(RpcException.mapException(e)); +} + } + + @Override + public void failed(RpcException ex) { +rpcOutcomeListener.failed(RpcException.mapException(ex)); + } + + @Override + public void success(SaslMessage value, ByteBuf
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102358541 --- Diff: contrib/native/client/cmakeModules/FindSASL.cmake --- @@ -0,0 +1,55 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# - Try to find Cyrus SASL + +if (MSVC) --- End diff -- Please also update the readme txt files on how to get/install SASL. On Mac 10.11 and higher, it looks like using Apple SASL implementation is deprecated (I get deprecation warnings from the compiler) --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102344797 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/security/ClientAuthenticatorProvider.java --- @@ -0,0 +1,109 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.security; + +import org.apache.drill.common.AutoCloseables; +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.common.map.CaseInsensitiveMap; +import org.apache.drill.exec.rpc.security.kerberos.KerberosFactory; +import org.apache.drill.exec.rpc.security.plain.PlainFactory; + +import javax.security.sasl.SaslException; +import java.util.Map; +import java.util.Set; + +public class ClientAuthenticatorProvider implements AuthenticatorProvider { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(ClientAuthenticatorProvider.class); + + private static final String customFactories = System.getProperty("customAuthFactories"); --- End diff -- shouldn't this property be prefixed with `drill.`? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102294263 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlConnectionConfig.java --- @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.rpc.control; + +import org.apache.drill.exec.exception.DrillbitStartupException; +import org.apache.drill.exec.memory.BufferAllocator; +import org.apache.drill.exec.proto.CoordinationProtos.DrillbitEndpoint; +import org.apache.drill.exec.rpc.BitConnectionConfig; +import org.apache.drill.exec.server.BootStrapContext; +import org.apache.drill.exec.work.batch.ControlMessageHandler; + +// package private +class ControlConnectionConfig extends BitConnectionConfig { +// private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(ControlConnectionConfig.class); + + private final ControlMessageHandler handler; + + private DrillbitEndpoint localEndpoint; + + ControlConnectionConfig(BufferAllocator allocator, BootStrapContext context, ControlMessageHandler handler) + throws DrillbitStartupException { +super(allocator, context); +this.handler = handler; + } + + @Override + public String getName() { +return "control"; // unused + } + + ControlMessageHandler getMessageHandler() { +return handler; + } + + void setLocalEndpoint(DrillbitEndpoint endpoint) { --- End diff -- Can we avoid modifying the original config to return the endpoint created by the server? it seems to me that the previous approach sounded safer as config is usually considered immutable. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r101605131 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/client/DrillClient.java --- @@ -435,9 +427,14 @@ public synchronized boolean reconnect() { } private void connect(DrillbitEndpoint endpoint) throws RpcException { -final FutureHandler f = new FutureHandler(); -client.connect(f, endpoint, props, getUserCredentials()); -f.checkedGet(); +client.connect(endpoint, properties, getUserCredentials()).checkedGet(); +if (client.serverRequiresAuthentication()) { + try { +client.authenticate(null).checkedGet(); --- End diff -- shouldn't this be done directly by the rpc client? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #578: DRILL-4280: Kerberos Authentication
Github user laurentgo commented on a diff in the pull request: https://github.com/apache/drill/pull/578#discussion_r102299705 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/rpc/control/ControlClient.java --- @@ -89,14 +90,36 @@ public MessageLite getResponseDefaultInstance(int rpcType) throws RpcException { } @Override - protected Response handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody) throws RpcException { -return handler.handle(connection, rpcType, pBody, dBody); + protected void handle(ControlConnection connection, int rpcType, ByteBuf pBody, ByteBuf dBody, +ResponseSender sender) throws RpcException { +connection.getCurrentHandler().handle(connection, rpcType, pBody, dBody, sender); } @Override protected void validateHandshake(BitControlHandshake handshake) throws RpcException { if (handshake.getRpcVersion() != ControlRpcConfig.RPC_VERSION) { - throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); + throw new RpcException(String.format("Invalid rpc version. Expected %d, actual %d.", + handshake.getRpcVersion(), ControlRpcConfig.RPC_VERSION)); +} + +if (handshake.getAuthenticationMechanismsCount() != 0) { // remote requires authentication + final SaslClient saslClient; --- End diff -- shouldn't the SASL client instantiation happens in `AuthenticationCommand` (which in turns provide it to `AuthenticationOutcomeRpcListener`). It looks like it would be more contained. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102345408 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/planner/cost/DrillRelMdSelectivity.java --- @@ -0,0 +1,219 @@ +/*** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + **/ +package org.apache.drill.exec.planner.cost; + +import org.apache.calcite.plan.RelOptUtil; +import org.apache.calcite.plan.volcano.RelSubset; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.SingleRel; +import org.apache.calcite.rel.core.JoinRelType; +import org.apache.calcite.rel.core.TableScan; +import org.apache.calcite.rel.metadata.ReflectiveRelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMdSelectivity; +import org.apache.calcite.rel.metadata.RelMdUtil; +import org.apache.calcite.rel.metadata.RelMetadataProvider; +import org.apache.calcite.rel.metadata.RelMetadataQuery; +import org.apache.calcite.rel.type.RelDataType; +import org.apache.calcite.rex.RexBuilder; +import org.apache.calcite.rex.RexCall; +import org.apache.calcite.rex.RexInputRef; +import org.apache.calcite.rex.RexNode; +import org.apache.calcite.rex.RexUtil; +import org.apache.calcite.rex.RexVisitor; +import org.apache.calcite.rex.RexVisitorImpl; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.util.BuiltInMethod; +import org.apache.calcite.util.Util; +import org.apache.drill.exec.planner.common.DrillJoinRelBase; +import org.apache.drill.exec.planner.common.DrillRelOptUtil; +import org.apache.drill.exec.planner.logical.DrillTable; +import org.apache.drill.exec.planner.logical.DrillTranslatableTable; + +import java.util.ArrayList; +import java.util.List; + +public class DrillRelMdSelectivity extends RelMdSelectivity { + private static final org.slf4j.Logger logger = + org.slf4j.LoggerFactory.getLogger(RelMdSelectivity.class); + + private static final DrillRelMdSelectivity INSTANCE = + new DrillRelMdSelectivity(); + + public static final RelMetadataProvider SOURCE = + ReflectiveRelMetadataProvider.reflectiveSource( + BuiltInMethod.SELECTIVITY.method, INSTANCE); + + @Override + public Double getSelectivity(RelNode rel, RexNode predicate) { +if (rel instanceof TableScan) { + return getScanSelectivity((TableScan) rel, predicate); +} else if (rel instanceof DrillJoinRelBase) { + return getJoinSelectivity(((DrillJoinRelBase) rel), predicate); +} else if (rel instanceof SingleRel && !DrillRelOptUtil.guessRows(rel)) { +return RelMetadataQuery.getSelectivity(((SingleRel) rel).getInput(), predicate); +} else if (rel instanceof RelSubset && !DrillRelOptUtil.guessRows(rel)) { + if (((RelSubset) rel).getBest() != null) { +return RelMetadataQuery.getSelectivity(((RelSubset)rel).getBest(), predicate); + } else if (((RelSubset)rel).getOriginal() != null) { +return RelMetadataQuery.getSelectivity(((RelSubset)rel).getOriginal(), predicate); + } else { +return super.getSelectivity(rel, predicate); + } +} else { + return super.getSelectivity(rel, predicate); +} + } + + private Double getJoinSelectivity(DrillJoinRelBase rel, RexNode predicate) { +double sel = 1.0; +// determine which filters apply to the left vs right +RexNode leftPred = null; +RexNode rightPred = null; +JoinRelType joinType = rel.getJoinType(); +final RexBuilder rexBuilder = rel.getCluster().getRexBuilder(); +int[] adjustments = new int[rel.getRowType().getFieldCount()]; + +if (DrillRelOptUtil.guessRows(rel)) { + return super.getSelectivity(rel, predicate); +} + +if (predicate !=
[jira] [Resolved] (DRILL-5103) External Sort pop shadows variable "initialAllocation" from AbstractBase
[ https://issues.apache.org/jira/browse/DRILL-5103?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Paul Rogers resolved DRILL-5103. Resolution: Fixed > External Sort pop shadows variable "initialAllocation" from AbstractBase > > > Key: DRILL-5103 > URL: https://issues.apache.org/jira/browse/DRILL-5103 > Project: Apache Drill > Issue Type: Sub-task >Affects Versions: 1.8.0 >Reporter: Paul Rogers >Assignee: Paul Rogers > > The Physical Operator (pop) definition for External sort contains the > following: > {code} > public class ExternalSort extends Sort { > private long initialAllocation = 2000; > {code} > But, > {code} > public abstract class AbstractBase implements PhysicalOperator{ > protected long initialAllocation = 100L; > {code} > The result is that the variable in {{ExternalSort}} shadows the one in > {{AbstractBase}}. Jackson deserialization sets the one in {{AbstractBase}}, > but the method {{getInitialAllocation()}} returns the shadowed one in > {{ExternalSort}}. > Remove the shadowed {{initialAllocation}} from {{ExternalSort}} and move the > initializer into the constructor. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102327621 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsAggBatch.java --- @@ -0,0 +1,256 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.sun.codemodel.JExpr; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.exec.exception.ClassTransformationException; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ClassGenerator; +import org.apache.drill.exec.expr.CodeGenerator; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.ValueVectorWriteExpression; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsAggregate; +import org.apache.drill.exec.physical.impl.aggregate.StreamingAggBatch; +import org.apache.drill.exec.physical.impl.aggregate.StreamingAggTemplate; +import org.apache.drill.exec.physical.impl.aggregate.StreamingAggregator; +import org.apache.drill.exec.planner.physical.StatsAggPrel.OperatorPhase; +import org.apache.drill.exec.record.BatchSchema.SelectionVectorMode; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TypedFieldId; +import org.apache.drill.exec.store.ImplicitColumnExplorer; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.complex.FieldIdUtil; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.IOException; +import java.util.GregorianCalendar; +import java.util.List; +import java.util.TimeZone; + +/** + * TODO: This needs cleanup. Currently the key values are constants and we compare the constants + * for every record. Seems unnecessary. + * + * Example input and output: + * Schema of incoming batch: region_id (VARCHAR), sales_city (VARCHAR), cnt (BIGINT) + * Schema of output: + *"schema" : BIGINT - Schema number. For each schema change this number is incremented. + *"computed" : BIGINT - What time is it computed? + *"columns" : MAP - Column names + * "region_id" : VARCHAR + * "sales_city" : VARCHAR + * "cnt": VARCHAR + *"statscount" : MAP + * "region_id" : BIGINT - statscount(region_id) - aggregation over all values of region_id + * in incoming batch + * "sales_city" : BIGINT - statscount(sales_city) + * "cnt": BIGINT - statscount(cnt) + *"nonnullstatcount" : MAP + * "region_id" : BIGINT - nonnullstatcount(region_id) + * "sales_city" : BIGINT - nonnullstatcount(sales_city) + * "cnt": BIGINT - nonnullstatcount(cnt) + * another map for next stats function + */ +public class StatisticsAggBatch extends StreamingAggBatch { + private List functions; + private int schema = 0; + + public StatisticsAggBatch(StatisticsAggregate popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, incoming, context); +this.functions = popConfig.getFunctions(); + } + + private void createKeyColumn(String name, LogicalExpression expr, List keyExprs, + List ke
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102326907 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[jira] [Created] (DRILL-5287) Provide option to skip updates of ephemeral state changes in Zookeeper
Padma Penumarthy created DRILL-5287: --- Summary: Provide option to skip updates of ephemeral state changes in Zookeeper Key: DRILL-5287 URL: https://issues.apache.org/jira/browse/DRILL-5287 Project: Apache Drill Issue Type: Bug Affects Versions: 1.9.0 Reporter: Padma Penumarthy Assignee: Padma Penumarthy Fix For: 1.10 We put transient profiles in zookeeper and update state as query progresses and changes states. It is observed that this adds latency of ~45msec for each update in the query execution path. This gets even worse when high number of concurrent queries are in progress. For concurrency=100, the average query response time even for short queries is 8 sec vs 0.2 sec with these updates disabled. For short lived queries in a high-throughput scenario, it is of no value to update state changes in zookeeper. We need an option to disable these updates for short running operational queries. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102325216 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102324705 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102324596 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102323849 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102322555 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102320614 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102319897 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102316247 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/config/UnpivotMaps.java --- @@ -0,0 +1,59 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.config; + +import java.util.List; + +import org.apache.drill.exec.physical.base.AbstractSingle; +import org.apache.drill.exec.physical.base.PhysicalOperator; +import org.apache.drill.exec.physical.base.PhysicalVisitor; +import org.apache.drill.exec.proto.UserBitShared.CoreOperatorType; + +import com.fasterxml.jackson.annotation.JsonCreator; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonTypeName; + +@JsonTypeName("unpivot-maps") +public class UnpivotMaps extends AbstractSingle { --- End diff -- This is generic in the sense that it can unpivot any given set of maps given that they are of the same size and contain the same keys(columns). --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102314383 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillStatsTable.java --- @@ -0,0 +1,347 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.planner.common; + +import java.io.IOException; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonGetter; +import com.fasterxml.jackson.annotation.JsonSetter; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.annotation.JsonSubTypes; +import com.fasterxml.jackson.annotation.JsonTypeInfo; +import com.fasterxml.jackson.annotation.JsonTypeName; +import com.fasterxml.jackson.databind.DeserializationFeature; +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Stopwatch; +import com.google.common.collect.Maps; +import org.apache.calcite.rel.RelNode; +import org.apache.calcite.rel.RelVisitor; +import org.apache.calcite.rel.core.TableScan; +import org.apache.drill.common.exceptions.DrillRuntimeException; +import org.apache.drill.exec.ops.QueryContext; +import org.apache.drill.exec.planner.logical.DrillTable; +import org.apache.drill.exec.util.ImpersonationUtil; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.joda.time.DateTime; + +/** + * Wraps the stats table info including schema and tableName. Also materializes stats from storage + * and keeps them in memory. + */ +public class DrillStatsTable { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(DrillStatsTable.class); + private final FileSystem fs; + private final Path tablePath; + + /** + * List of columns in stats table. + */ + public static final String COL_COLUMN = "column"; + public static final String COL_COMPUTED = "computed"; + public static final String COL_STATCOUNT = "statcount"; + public static final String COL_NDV = "ndv"; + + private final String schemaName; + private final String tableName; + + private final Map ndv = Maps.newHashMap(); + private double rowCount = -1; + + private boolean materialized = false; + + private TableStatistics statistics = null; + + public DrillStatsTable(String schemaName, String tableName, Path tablePath, FileSystem fs) { +this.schemaName = schemaName; +this.tableName = tableName; +this.tablePath = tablePath; +this.fs = ImpersonationUtil.createFileSystem(ImpersonationUtil.getProcessUserName(), fs.getConf()); + } + + public String getSchemaName() { +return schemaName; + } + + public String getTableName() { +return tableName; + } + /** + * Get number of distinct values of given column. If stats are not present for the given column, + * a null is returned. + * + * Note: returned data may not be accurate. Accuracy depends on whether the table data has changed after the + * stats are computed. + * + * @param col + * @return + */ + public Double getNdv(String col) { +// Stats might not have materialized because of errors. +if (!materialized) { + return null; +} +final String upperCol = col.toUpperCase(); +final Long ndvCol = ndv.get(upperCol); +// Ndv estimation techniques like HLL may over-estimate, hence cap it at rowCount +if (ndvCol != null) { + return Math.min(ndvCol, rowCount); --- End diff -- Histograms would help with the data skew. When we have histograms, the NDV would be obtained from the Histograms. Stats will be off by default (so not as risky?), and the exis
[jira] [Created] (DRILL-5286) When rel and target candidate set is the same, planner should not need to do convert for the relNode since it must have been done
Chunhui Shi created DRILL-5286: -- Summary: When rel and target candidate set is the same, planner should not need to do convert for the relNode since it must have been done Key: DRILL-5286 URL: https://issues.apache.org/jira/browse/DRILL-5286 Project: Apache Drill Issue Type: Bug Reporter: Chunhui Shi Assignee: Chunhui Shi -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102310795 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102306890 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill issue #738: DRILL-5190: Display planning and queued time for a query i...
Github user sudheeshkatkam commented on the issue: https://github.com/apache/drill/pull/738 +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #751: DRILL-5259: Allow listing a user-defined number of profile...
Github user sudheeshkatkam commented on the issue: https://github.com/apache/drill/pull/751 +1 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[jira] [Resolved] (DRILL-5203) Provide per-test control over logging for unit tests
[ https://issues.apache.org/jira/browse/DRILL-5203?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Paul Rogers resolved DRILL-5203. Resolution: Fixed Added as part of the new "cluster fixture" unit test framework. > Provide per-test control over logging for unit tests > > > Key: DRILL-5203 > URL: https://issues.apache.org/jira/browse/DRILL-5203 > Project: Apache Drill > Issue Type: Improvement > Components: Tools, Build & Test >Affects Versions: 1.9.0 >Reporter: Paul Rogers >Assignee: Paul Rogers >Priority: Minor > > Drill provides extensive logging. In production, users typically turn on all > logging to some useful level, say WARN for normal operation or DEBUG when > problems occur. > Drill has a wide variety of unit tests, each of which covers some particular > part of the product. When working in that area, we wish to turn on logging > just for that one area, and often just for the test being used to drive > development (in Test-Driven Development fashion.) > Today, we control logging only via the single, shared {{logback.xml}} file in > {{$DRILL_HOME/conf}} in production, and the {{logback-test.xml}} file in the > {{src/resources}} directory during development. This is a very blunt tool: it > affects all tests and is cumbersome to use on a per-test basis. > This, then is the motivation for a "log fixture": a simple piece of code that > lets us turn on very targeted logging for the duration of a single test, then > restore the defaults afterwards. > Example: > {code} > @Test > public void testSomething() throws Exception { > LogFixtureBuilder logBuilder = LogFixture.builder() > .toConsole() > .disable() // Turn off all logging... > // Except for this one logger > .logger(ExternalSortBatch.class, Level.DEBUG); > try (LogFixture logs = logBuilder.build()) { > // Do the test here: the one logger is set to debug to the console > } > // Logging back to configured settings > } > {code} > Alternatively, the settings can affect an entire test class: > {code} > private static LogFixture logFixture; > @BeforeClass > public static void setup() { > logFixture = LogFixture.builder() > .toConsole() > .disable() > .logger(ExternalSortBatch.class, Level.DEBUG) > .build(); > } > @AfterClass > public void tearDown() { > logFixture.close(); > } > {code} -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #754: DRILL-5275: Sort spill is slow due to repeated allo...
Github user Ben-Zvi commented on a diff in the pull request: https://github.com/apache/drill/pull/754#discussion_r102295524 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/cache/VectorAccessibleSerializable.java --- @@ -57,6 +57,12 @@ private BatchSchema.SelectionVectorMode svMode = BatchSchema.SelectionVectorMode.NONE; private SelectionVector2 sv2; + /** + * Disk I/O buffer used for all reads and writes of DrillBufs. + */ + + private byte buffer[] = new byte[32*1024]; --- End diff -- (1) Is 32K too small ? (because it is heap?) Does a higher size perform better ? (2) Would it work better if the operator would allocate this serialization buffer from the direct memory (and only when spilling), and then free it at the end ? --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102295174 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102295106 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[GitHub] drill pull request #742: DRILL-5242: The UI breaks when rendering profiles h...
Github user kkhatua closed the pull request at: https://github.com/apache/drill/pull/742 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #742: DRILL-5242: The UI breaks when rendering profiles having u...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/742 This closes PR #742 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill issue #739: DRILL-5230: Translation of millisecond duration into hours...
Github user kkhatua commented on the issue: https://github.com/apache/drill/pull/739 This closes PR #739 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #739: DRILL-5230: Translation of millisecond duration int...
Github user kkhatua closed the pull request at: https://github.com/apache/drill/pull/739 --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102294356 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[jira] [Created] (DRILL-5285) Provide detailed, accurate estimate of size consumed by a record batch
Paul Rogers created DRILL-5285: -- Summary: Provide detailed, accurate estimate of size consumed by a record batch Key: DRILL-5285 URL: https://issues.apache.org/jira/browse/DRILL-5285 Project: Apache Drill Issue Type: Sub-task Affects Versions: 1.10.0 Reporter: Paul Rogers Assignee: Paul Rogers Fix For: 1.10.0 DRILL-5080 introduced a {{RecordBatchSizer}} that estimates the space taken by a record batch and determines batch "density." Drill provides a large variety of vectors, each with their own internal structure and collections of vectors. For example, fixed vectors use just a data vector. Nullable vectors add an "is set" vector. Variable length vectors add an offset vector. Repeated vectors add a second offset vector. The original {{RecordBatchSizer}} attempted to compute sizes for all these vector types. But, the complexity got to be out of hand. This ticket requests to simply bite the bullet and move the calculations into each vector type so that the {{RecordBatchSizer}} can simply use the results of the calculations. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102292075 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatch.java --- @@ -0,0 +1,532 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.apache.drill.common.expression.LogicalExpression; +import org.apache.drill.common.expression.ErrorCollector; +import org.apache.drill.common.expression.ErrorCollectorImpl; +import org.apache.drill.common.expression.FunctionCallFactory; +import org.apache.drill.common.expression.SchemaPath; +import org.apache.drill.common.expression.ValueExpressions; +import org.apache.drill.common.types.TypeProtos; +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.ExpressionTreeMaterializer; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.expr.holders.BigIntHolder; +import org.apache.drill.exec.expr.holders.NullableFloat8Holder; +import org.apache.drill.exec.expr.holders.ObjectHolder; +import org.apache.drill.exec.expr.holders.ValueHolder; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.DateVector; +import org.apache.drill.exec.vector.BigIntVector; +import org.apache.drill.exec.vector.NullableBigIntVector; +import org.apache.drill.exec.vector.NullableFloat8Vector; +import org.apache.drill.exec.vector.NullableVarBinaryVector; +import org.apache.drill.exec.vector.VarCharVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import java.io.ByteArrayInputStream; +import java.io.DataInputStream; +import java.io.IOException; +import java.util.List; +import java.util.GregorianCalendar; +import java.util.HashMap; +import java.util.Map; +import java.util.TimeZone; + +import com.clearspring.analytics.stream.cardinality.HyperLogLog; + +public class StatisticsMergeBatch extends AbstractSingleRecordBatch { + private static final org.slf4j.Logger logger = org.slf4j.LoggerFactory.getLogger(StatisticsMergeBatch.class); + private Map functions; + private boolean first = true; + private boolean finished = false; + private int schema = 0; + private int recordCount = 0; + private List keyList = null; + private Map dataSrcVecMap = null; + // Map of non-map fields to VV in the incoming schema + private Map copySrcVecMap = null; + private Map> aggregationMap = null; + public StatisticsMergeBatch(StatisticsMerge popConfig, RecordBatch incoming, + FragmentContext context) throws OutOfMemoryException { +super(popConfig, context, incoming); +this.functions = new HashMap<>(); +this.aggregationMap = new HashMap<>(); + +/*for (String key : popConfig.getFunctions()) { + aggregationMap.put(key, new HashMap()); + if (key.equalsIgnoreCase("statcount") || key.equalsIgnoreCase("nonnullstatcount")) { +functions.put(key, "sum"); + } else if (key.equalsIgnoreCase("hll")) { +functions.put(key, "hll_merge"); + } else if (key.equalsIgnoreCase("sum_width")) { +functions.put(key, "avg_width"); +
[jira] [Created] (DRILL-5284) Roll-up of final fixes for managed sort
Paul Rogers created DRILL-5284: -- Summary: Roll-up of final fixes for managed sort Key: DRILL-5284 URL: https://issues.apache.org/jira/browse/DRILL-5284 Project: Apache Drill Issue Type: Bug Affects Versions: 1.10.0 Reporter: Paul Rogers Assignee: Paul Rogers Fix For: 1.10.0 The managed external sort was introduced in DRILL-5080. Since that time, extensive testing has identified a number of minor fixes and improvements. Given the long PR cycles, it is not practical to spend a week or two to do a PR for each fix individually. This ticket represents a roll-up of a combination of a number of fixes. Small fixes are listed here, larger items appear as sub-tasks. -- This message was sent by Atlassian JIRA (v6.3.15#6346)
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102291655 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/planner/common/DrillFilterRelBase.java --- @@ -113,6 +114,6 @@ public double getRows() { selectivity = filterMaxSelectivityEstimateFactor; } } -return selectivity*RelMetadataQuery.getRowCount(getInput()); +return NumberUtil.multiply(selectivity, RelMetadataQuery.getRowCount(getInput())); --- End diff -- Done --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102291099 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/unpivot/UnpivotMapsRecordBatch.java --- @@ -0,0 +1,276 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.unpivot; + +import java.util.List; +import java.util.Map; + +import org.apache.drill.common.types.TypeProtos.MajorType; +import org.apache.drill.common.types.TypeProtos.MinorType; +import org.apache.drill.exec.exception.OutOfMemoryException; +import org.apache.drill.exec.exception.SchemaChangeException; +import org.apache.drill.exec.expr.TypeHelper; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.UnpivotMaps; +import org.apache.drill.exec.record.AbstractSingleRecordBatch; +import org.apache.drill.exec.record.MaterializedField; +import org.apache.drill.exec.record.RecordBatch; +import org.apache.drill.exec.record.TransferPair; +import org.apache.drill.exec.record.VectorContainer; +import org.apache.drill.exec.record.VectorWrapper; +import org.apache.drill.exec.vector.ValueVector; +import org.apache.drill.exec.vector.complex.MapVector; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; + +/** + * Unpivot maps. Assumptions are: + * 1) all child vectors in a map are of same type. + * 2) Each map contains the same number of fields and field names are also same (types could be different). + * + * Example input and output: + * Schema of input: + *"schema": BIGINT - Schema number. For each schema change this number is incremented. + *"computed" : BIGINT - What time is it computed? + *"columns" : MAP - Column names + * "region_id" : VARCHAR + * "sales_city" : VARCHAR + * "cnt": VARCHAR + *"statscount" : MAP + * "region_id" : BIGINT - statscount(region_id) - aggregation over all values of region_id + * in incoming batch + * "sales_city" : BIGINT - statscount(sales_city) + * "cnt": BIGINT - statscount(cnt) + *"nonnullstatcount" : MAP + * "region_id" : BIGINT - nonnullstatcount(region_id) + * "sales_city" : BIGINT - nonnullstatcount(sales_city) + * "cnt": BIGINT - nonnullstatcount(cnt) + * another map for next stats function + * + * Schema of output: --- End diff -- For now, we stick to the original reviewed design. We can revisit the design later, if required. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102290203 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/statistics/StatisticsMergeBatchCreator.java --- @@ -0,0 +1,39 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.statistics; + +import com.google.common.base.Preconditions; +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.StatisticsMerge; +import org.apache.drill.exec.physical.impl.BatchCreator; +import org.apache.drill.exec.record.CloseableRecordBatch; +import org.apache.drill.exec.record.RecordBatch; + +import java.util.List; + +@SuppressWarnings("unused") --- End diff -- Copy-paste artifact. Removed it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---
[GitHub] drill pull request #729: Drill 1328: Support table statistics for Parquet
Github user gparai commented on a diff in the pull request: https://github.com/apache/drill/pull/729#discussion_r102290224 --- Diff: exec/java-exec/src/main/java/org/apache/drill/exec/physical/impl/unpivot/UnpivotMapsBatchCreator.java --- @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.physical.impl.unpivot; + +import java.util.List; + +import org.apache.drill.common.exceptions.ExecutionSetupException; +import org.apache.drill.exec.ops.FragmentContext; +import org.apache.drill.exec.physical.config.UnpivotMaps; +import org.apache.drill.exec.physical.impl.BatchCreator; +import org.apache.drill.exec.record.CloseableRecordBatch; +import org.apache.drill.exec.record.RecordBatch; + +import com.google.common.base.Preconditions; + +@SuppressWarnings("unused") --- End diff -- Copy-paste artifact. Removed it. --- If your project is set up for it, you can reply to this email and have your reply appear on GitHub as well. If your project does not have this feature enabled and wishes so, or if the feature is enabled but not working, please contact infrastructure at infrastruct...@apache.org or file a JIRA ticket with INFRA. ---