[ 
https://issues.apache.org/jira/browse/SPARK-21480?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16094351#comment-16094351
 ] 

Jack Hu commented on SPARK-21480:
---------------------------------

The issue seems resovled in latest HIVE: 
[HIVE-15551|https://issues.apache.org/jira/browse/HIVE-15551]

> Memory leak in 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult
> ----------------------------------------------------------------------------------
>
>                 Key: SPARK-21480
>                 URL: https://issues.apache.org/jira/browse/SPARK-21480
>             Project: Spark
>          Issue Type: Bug
>          Components: SQL
>    Affects Versions: 2.1.1
>            Reporter: Jack Hu
>
> There is memory leak in hive with mysql in 
> {{org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult}}, 
> which create sql statements, but not close it.
> Here is the simple one to recreate it:
> {code:java}
> val port = 19999
> val stream = ssc.socketTextStream("host", port).map(x => 
> (x,x)).updateStateByKey(
>       (inputs : Seq[String], s : Option[String]) => 
> inputs.lastOption.orElse(s)
>  )
> stream.foreachRDD((rdd, t) => {
>      
> hiveContext.sparkSession.createDataFrame(rdd).write.mode("overwrite").saveAsTable("t")
> }
> {code}
> Here is the hive settings
> {code}
> hive.metastore.warehouse.dir=file:///user/hive/warehouse
> javax.jdo.option.ConnectionURL=jdbc:mysql://ip:3306/hive
> spark.sql.warehouse.dir=file:///user/hive/warehouse
> javax.jdo.option.ConnectionDriveName=com.mysql.jdbc.Driver
> javax.jdo.option.ConnectionUserName=hive
> javax.jdo.option.ConnectionPassword=hive
> hive.exec.dynamic.partition.mode=nonstrict
> {code}
> After execute a while, there are many instances of 
> {{com.mysql.jdbc.JDBC42ResultSet}} and {{com.mysql.jdbc.StatementImpl}} and 
> keep increasing.
> After attache a debugger, we found the statements create in 
> {{org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult}} but 
> not closed
> {code:java}
>  private void executeNoResult(final String queryText) throws SQLException {
>     JDOConnection jdoConn = pm.getDataStoreConnection();
>     boolean doTrace = LOG.isDebugEnabled();
>     try {
>       long start = doTrace ? System.nanoTime() : 0;
>       
> ((Connection)jdoConn.getNativeConnection()).createStatement().execute(queryText);
>       timingTrace(doTrace, queryText, start, doTrace ? System.nanoTime() : 0);
>     } finally {
>       jdoConn.close(); // We must release the connection before we call other 
> pm methods.
>     }
>   }
> {code}
> The reference call stack is 
> {code:java}
> at com.mysql.jdbc.JDBC42ResultSet.<init>(JDBC42ResultSet.java:44)
>       at sun.reflect.GeneratedConstructorAccessor14.newInstance(Unknown 
> Source)
>       at 
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
>       at java.lang.reflect.Constructor.newInstance(Constructor.java:422)
>       at com.mysql.jdbc.Util.handleNewInstance(Util.java:404)
>       at com.mysql.jdbc.ResultSetImpl.getInstance(ResultSetImpl.java:319)
>       at com.mysql.jdbc.MysqlIO.buildResultSetWithUpdates(MysqlIO.java:3114)
>       at com.mysql.jdbc.MysqlIO.readResultsForQueryOrUpdate(MysqlIO.java:3014)
>       at com.mysql.jdbc.MysqlIO.readAllResults(MysqlIO.java:2280)
>       at com.mysql.jdbc.MysqlIO.sqlQueryDirect(MysqlIO.java:2673)
>       at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2546)
>       at com.mysql.jdbc.ConnectionImpl.execSQL(ConnectionImpl.java:2504)
>       at com.mysql.jdbc.StatementImpl.executeInternal(StatementImpl.java:840)
>       at com.mysql.jdbc.StatementImpl.execute(StatementImpl.java:740)
>       at com.jolbox.bonecp.StatementHandle.execute(StatementHandle.java:254)
>       at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.executeNoResult(MetaStoreDirectSql.java:233)
>       at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.doDbSpecificInitializationsBeforeQuery(MetaStoreDirectSql.java:222)
>       at 
> org.apache.hadoop.hive.metastore.MetaStoreDirectSql.getDatabase(MetaStoreDirectSql.java:263)
>       at 
> org.apache.hadoop.hive.metastore.ObjectStore$1.getSqlResult(ObjectStore.java:578)
>       at 
> org.apache.hadoop.hive.metastore.ObjectStore$1.getSqlResult(ObjectStore.java:575)
>       at 
> org.apache.hadoop.hive.metastore.ObjectStore$GetHelper.run(ObjectStore.java:2385)
>       at 
> org.apache.hadoop.hive.metastore.ObjectStore.getDatabaseInternal(ObjectStore.java:575)
>       at 
> org.apache.hadoop.hive.metastore.ObjectStore.getDatabase(ObjectStore.java:559)
>       at sun.reflect.GeneratedMethodAccessor60.invoke(Unknown Source)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:497)
>       at 
> org.apache.hadoop.hive.metastore.RawStoreProxy.invoke(RawStoreProxy.java:114)
>       at com.sun.proxy.$Proxy9.getDatabase(Unknown Source)
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database_core(HiveMetaStore.java:956)
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStore$HMSHandler.get_database(HiveMetaStore.java:930)
>       at sun.reflect.GeneratedMethodAccessor62.invoke(Unknown Source)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:497)
>       at 
> org.apache.hadoop.hive.metastore.RetryingHMSHandler.invoke(RetryingHMSHandler.java:107)
>       at com.sun.proxy.$Proxy11.get_database(Unknown Source)
>       at 
> org.apache.hadoop.hive.metastore.HiveMetaStoreClient.getDatabase(HiveMetaStoreClient.java:1158)
>       at sun.reflect.GeneratedMethodAccessor61.invoke(Unknown Source)
>       at 
> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43)
>       at java.lang.reflect.Method.invoke(Method.java:497)
>       at 
> org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:156)
>       at com.sun.proxy.$Proxy12.getDatabase(Unknown Source)
>       at org.apache.hadoop.hive.ql.metadata.Hive.getDatabase(Hive.java:1301)
>       at 
> org.apache.hadoop.hive.ql.metadata.Hive.databaseExists(Hive.java:1290)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply$mcZ$sp(HiveClientImpl.scala:346)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply(HiveClientImpl.scala:346)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$databaseExists$1.apply(HiveClientImpl.scala:346)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl$$anonfun$withHiveState$1.apply(HiveClientImpl.scala:279)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl.liftedTree1$1(HiveClientImpl.scala:226)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl.retryLocked(HiveClientImpl.scala:225)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl.withHiveState(HiveClientImpl.scala:268)
>       at 
> org.apache.spark.sql.hive.client.HiveClientImpl.databaseExists(HiveClientImpl.scala:345)
>       at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply$mcZ$sp(HiveExternalCatalog.scala:178)
>       at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:178)
>       at 
> org.apache.spark.sql.hive.HiveExternalCatalog$$anonfun$databaseExists$1.apply(HiveExternalCatalog.scala:178)
>       at 
> org.apache.spark.sql.hive.HiveExternalCatalog.withClient(HiveExternalCatalog.scala:97)
>       at 
> org.apache.spark.sql.hive.HiveExternalCatalog.databaseExists(HiveExternalCatalog.scala:177)
>       at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.databaseExists(SessionCatalog.scala:190)
>       at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.org$apache$spark$sql$catalyst$catalog$SessionCatalog$$requireDbExists(SessionCatalog.scala:130)
>       at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.getTableMetadata(SessionCatalog.scala:288)
>       at 
> org.apache.spark.sql.catalyst.catalog.SessionCatalog.getTempViewOrPermanentTableMetadata(SessionCatalog.scala:464)
>       at 
> org.apache.spark.sql.internal.CatalogImpl.org$apache$spark$sql$internal$CatalogImpl$$makeTable(CatalogImpl.scala:103)
>       at 
> org.apache.spark.sql.internal.CatalogImpl$$anonfun$2.apply(CatalogImpl.scala:98)
>       at 
> org.apache.spark.sql.internal.CatalogImpl$$anonfun$2.apply(CatalogImpl.scala:98)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>       at 
> scala.collection.TraversableLike$$anonfun$map$1.apply(TraversableLike.scala:234)
>       at 
> scala.collection.mutable.ResizableArray$class.foreach(ResizableArray.scala:59)
>       at scala.collection.mutable.ArrayBuffer.foreach(ArrayBuffer.scala:48)
>       at scala.collection.TraversableLike$class.map(TraversableLike.scala:234)
>       at scala.collection.AbstractTraversable.map(Traversable.scala:104)
>       at 
> org.apache.spark.sql.internal.CatalogImpl.listTables(CatalogImpl.scala:98)
> {code}



--
This message was sent by Atlassian JIRA
(v6.4.14#64029)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@spark.apache.org
For additional commands, e-mail: issues-h...@spark.apache.org

Reply via email to