[ https://issues.apache.org/jira/browse/SPARK-21480?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16094351#comment-16094351 ]
Jack Hu commented on SPARK-21480: --------------------------------- The issue seems resovled in latest HIVE: [HIVE-15551|https://issues.apache.org/jira/browse/HIVE-15551] > Memory leak in > org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult > ---------------------------------------------------------------------------------- > > Key: SPARK-21480 > URL: https://issues.apache.org/jira/browse/SPARK-21480 > Project: Spark > Issue Type: Bug > Components: SQL > Affects Versions: 2.1.1 > Reporter: Jack Hu > > There is memory leak in hive with mysql in > {{org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult}}, > which create sql statements, but not close it. > Here is the simple one to recreate it: > {code:java} > val port = 19999 > val stream = ssc.socketTextStream("host", port).map(x => > (x,x)).updateStateByKey( > (inputs : Seq[String], s : Option[String]) => > inputs.lastOption.orElse(s) > ) > stream.foreachRDD((rdd, t) => { > > hiveContext.sparkSession.createDataFrame(rdd).write.mode("overwrite").saveAsTable("t") > } > {code} > Here is the hive settings > {code} > hive.metastore.warehouse.dir=file:///user/hive/warehouse > javax.jdo.option.ConnectionURL=jdbc:mysql://ip:3306/hive > spark.sql.warehouse.dir=file:///user/hive/warehouse > javax.jdo.option.ConnectionDriveName=com.mysql.jdbc.Driver > javax.jdo.option.ConnectionUserName=hive > javax.jdo.option.ConnectionPassword=hive > hive.exec.dynamic.partition.mode=nonstrict > {code} > After execute a while, there are many instances of > {{com.mysql.jdbc.JDBC42ResultSet}} and {{com.mysql.jdbc.StatementImpl}} and > keep increasing. > After attache a debugger, we found the statements create in > {{org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult}} but > not closed > {code:java} > private void executeNoResult(final String queryText) throws SQLException { > JDOConnection jdoConn = pm.getDataStoreConnection(); > boolean doTrace = LOG.isDebugEnabled(); > try { > long start = doTrace ? System.nanoTime() : 0; > > ((Connection)jdoConn.getNativeConnection()).createStatement().execute(queryText); > timingTrace(doTrace, queryText, start, doTrace ? System.nanoTime() : 0); > } finally { > jdoConn.close(); // We must release the connection before we call other > pm methods. > } > } > {code} > The reference call stack is > {code:java} > at com.mysql.jdbc.JDBC42ResultSet.<init>(JDBC42ResultSet.java:44) > at sun.reflect.GeneratedConstructorAccessor14.newInstance(Unknown > Source) > at > sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45) > at java.lang.reflect.Constructor.newInstance(Constructor.java:422) > at com.mysql.jdbc.Util.handleNewInstance(Util.java:404) > at com.mysql.jdbc.ResultSetImpl.getInstance(ResultSetImpl.java:319) > at com.mysql.jdbc.MysqlIO.buildResultSetWithUpdates(MysqlIO.java:3114) > at com.mysql.jdbc.MysqlIO.readResultsForQueryOrUpdate(MysqlIO.java:3014) > at com.mysql.jdbc.MysqlIO.readAllResults(MysqlIO.java:2280) > at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2673) > at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2546) > at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2504) > at com.mysql.jdbc.StatementImpl.executeInternal(StatementImpl.java:840) > at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:740) > at com.jolbox.bonecp.StatementHandle.execute(StatementHandle.java:254) > at > org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult(MetaStoreDirectSql.java:233) > at > org.apache.hadoop.hive.metastore.MetaStoreDirectSql.doDbSpecificInitializationsBeforeQuery(MetaStoreDirectSql.java:222) > at > org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getDatabase(MetaStoreDirectSql.java:263) > at > org.apache.hadoop.hive.metastore.ObjectStore$1.getSqlResult(ObjectStore.java:578) > at > org.apache.hadoop.hive.metastore.ObjectStore$1.getSqlResult(ObjectStore.java:575) > at > org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2385) > at > org.apache.hadoop.hive.metastore.ObjectStore.getDatabaseInternal(ObjectStore.java:575) > at > org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:559) > at sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:114) > at com.sun.proxy.$Proxy9.getDatabase(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database_core(HiveMetaStore.java:956) > at > org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database(HiveMetaStore.java:930) > at sun.reflect.GeneratedMethodAccessor62.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107) > at com.sun.proxy.$Proxy11.get_database(Unknown Source) > at > org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getDatabase(HiveMetaStoreClient.java:1158) > at sun.reflect.GeneratedMethodAccessor61.invoke(Unknown Source) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:497) > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156) > at com.sun.proxy.$Proxy12.getDatabase(Unknown Source) > at org.apache.hadoop.hive.ql.metadata.Hive.getDatabase(Hive.java:1301) > at > org.apache.hadoop.hive.ql.metadata.Hive.databaseExists(Hive.java:1290) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply$mcZ$sp(HiveClientImpl.scala:346) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply(HiveClientImpl.scala:346) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply(HiveClientImpl.scala:346) > at > org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:279) > at > org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:226) > at > org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:225) > at > org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:268) > at > org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:345) > at > org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:178) > at > org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:178) > at > org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:178) > at > org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97) > at > org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:177) > at > org.apache.spark.sql.catalyst.catalog.SessionCatalog.databaseExists(SessionCatalog.scala:190) > at > org.apache.spark.sql.catalyst.catalog.SessionCatalog.org$apache$spark$sql$catalyst$catalog$SessionCatalog$$requireDbExists(SessionCatalog.scala:130) > at > org.apache.spark.sql.catalyst.catalog.SessionCatalog.getTableMetadata(SessionCatalog.scala:288) > at > org.apache.spark.sql.catalyst.catalog.SessionCatalog.getTempViewOrPermanentTableMetadata(SessionCatalog.scala:464) > at > org.apache.spark.sql.internal.CatalogImpl.org$apache$spark$sql$internal$CatalogImpl$$makeTable(CatalogImpl.scala:103) > at > org.apache.spark.sql.internal.CatalogImpl$$anonfun$2.apply(CatalogImpl.scala:98) > at > org.apache.spark.sql.internal.CatalogImpl$$anonfun$2.apply(CatalogImpl.scala:98) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234) > at > scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59) > at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48) > at scala.collection.TraversableLike$class.map(TraversableLike.scala:234) > at scala.collection.AbstractTraversable.map(Traversable.scala:104) > at > org.apache.spark.sql.internal.CatalogImpl.listTables(CatalogImpl.scala:98) > {code} -- This message was sent by Atlassian JIRA (v6.4.14#64029) --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org For additional commands, e-mail: issues-h...@spark.apache.org