tdg5 commented on a change in pull request #29024: URL: https://github.com/apache/spark/pull/29024#discussion_r755337180
########## File path: sql/core/src/main/scala/org/apache/spark/sql/execution/datasources/jdbc/connection/ConnectionProvider.scala ########## @@ -18,60 +18,45 @@ package org.apache.spark.sql.execution.datasources.jdbc.connection import java.sql.{Connection, Driver} -import java.util.Properties +import java.util.ServiceLoader -import org.apache.spark.internal.Logging -import org.apache.spark.sql.execution.datasources.jdbc.JDBCOptions - -/** - * Connection provider which opens connection toward various databases (database specific instance - * needed). If kerberos authentication required then it's the provider's responsibility to set all - * the parameters. - */ -private[jdbc] trait ConnectionProvider { - /** - * Additional properties for data connection (Data source property takes precedence). - */ - def getAdditionalProperties(): Properties = new Properties() +import scala.collection.mutable - /** - * Opens connection toward the database. - */ - def getConnection(): Connection -} +import org.apache.spark.internal.Logging +import org.apache.spark.security.SecurityConfigurationLock +import org.apache.spark.sql.jdbc.JdbcConnectionProvider +import org.apache.spark.util.Utils private[jdbc] object ConnectionProvider extends Logging { - def create(driver: Driver, options: JDBCOptions): ConnectionProvider = { - if (options.keytab == null || options.principal == null) { - logDebug("No authentication configuration found, using basic connection provider") - new BasicConnectionProvider(driver, options) - } else { - logDebug("Authentication configuration found, using database specific connection provider") - options.driverClass match { - case PostgresConnectionProvider.driverClass => - logDebug("Postgres connection provider found") - new PostgresConnectionProvider(driver, options) - - case MariaDBConnectionProvider.driverClass => - logDebug("MariaDB connection provider found") - new MariaDBConnectionProvider(driver, options) - - case DB2ConnectionProvider.driverClass => - logDebug("DB2 connection provider found") - new DB2ConnectionProvider(driver, options) - - case MSSQLConnectionProvider.driverClass => - logDebug("MS SQL connection provider found") - new MSSQLConnectionProvider(driver, options) - - case OracleConnectionProvider.driverClass => - logDebug("Oracle connection provider found") - new OracleConnectionProvider(driver, options) - - case _ => - throw new IllegalArgumentException(s"Driver ${options.driverClass} does not support " + - "Kerberos authentication") + private val providers = loadProviders() + + def loadProviders(): Seq[JdbcConnectionProvider] = { + val loader = ServiceLoader.load(classOf[JdbcConnectionProvider], + Utils.getContextOrSparkClassLoader) + val providers = mutable.ArrayBuffer[JdbcConnectionProvider]() + + val iterator = loader.iterator + while (iterator.hasNext) { + try { + val provider = iterator.next + logDebug(s"Loaded built in provider: $provider") + providers += provider + } catch { + case t: Throwable => + logError(s"Failed to load built in provider.", t) } } + // Seems duplicate but it's needed for Scala 2.13 + providers.toSeq + } + + def create(driver: Driver, options: Map[String, String]): Connection = { + val filteredProviders = providers.filter(_.canHandle(driver, options)) + require(filteredProviders.size == 1, + "JDBC connection initiated but not exactly one connection provider found which can handle " + + s"it. Found active providers: ${filteredProviders.mkString(", ")}") + SecurityConfigurationLock.synchronized { Review comment: I don't know anything about the underlying problem, but it had crossed my mind that allowing the synchronization to be optional could be one path forward. @gaborgsomogyi may I trouble you for some references related to > The main problem is that JVM has a single security context. In order to make a connection one MUST modify the security context, otherwise the JDBC connector is not able to provide the security credentials (TGT, keytab or whatever) so I can better familiarize myself with the problem that you are describing? Beyond this particular issue, what you've shared suggests that the concurrency utilized by my app could be causing us to crosswire data which would be a major problem. I guess I'd also ask, is there more to it than you described? It sounds like I should either have some cross wired data or if that's not the case then there is some missing piece of the puzzle that means the synchronization is not always required. Thanks in advance! -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org