[ https://issues.apache.org/jira/browse/HIVE-23532?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Stamatis Zampetakis updated HIVE-23532: --------------------------------------- Attachment: HIVE-23532.01.patch > NPE when fetching incomplete column statistics from the metastore > ----------------------------------------------------------------- > > Key: HIVE-23532 > URL: https://issues.apache.org/jira/browse/HIVE-23532 > Project: Hive > Issue Type: Bug > Reporter: Stamatis Zampetakis > Priority: Minor > Attachments: HIVE-23532.01.patch > > > Certain operations may store in the metastore incomplete column statistics. > Fetching those statistics back from the metastore leads to > {{NullPointerException}} . > For instance consider a column "name" of type string. If we do have > statistics for this column then the following info must be available: > * maxColLen; > * avgColLen; > * numNulls; > * numDVs; > Executing the following statement on a table with no stats updates a subset > of the statistics for this column: > {code:sql} > ALTER TABLE example UPDATE STATISTICS for column name SET ('numDVs'='242', > 'numNulls'='5'); > {code} > Fetching this kind of statistics leads to NPE that sometimes pops up in the > client and some other times is buried in the logs leading to incomplete > column stats during optimization and execution of a query. > Usually the stacktrace is similar to the one below: > {noformat} > org.apache.hadoop.hive.ql.metadata.HiveException: > java.lang.NullPointerException > at > org.apache.hadoop.hive.ql.metadata.Hive.getTableColumnStatistics(Hive.java:5251) > at > org.apache.hadoop.hive.ql.ddl.table.info.desc.DescTableOperation.getColumnDataColPathSpecified(DescTableOperation.java:216) > at > org.apache.hadoop.hive.ql.ddl.table.info.desc.DescTableOperation.execute(DescTableOperation.java:94) > at org.apache.hadoop.hive.ql.ddl.DDLTask.execute(DDLTask.java:80) > at org.apache.hadoop.hive.ql.exec.Task.executeTask(Task.java:213) > at > org.apache.hadoop.hive.ql.exec.TaskRunner.runSequential(TaskRunner.java:105) > at org.apache.hadoop.hive.ql.Executor.launchTask(Executor.java:362) > at org.apache.hadoop.hive.ql.Executor.launchTasks(Executor.java:335) > at org.apache.hadoop.hive.ql.Executor.runTasks(Executor.java:246) > at org.apache.hadoop.hive.ql.Executor.execute(Executor.java:109) > at org.apache.hadoop.hive.ql.Driver.runInternal(Driver.java:723) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:492) > at org.apache.hadoop.hive.ql.Driver.run(Driver.java:486) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:164) > at > org.apache.hadoop.hive.ql.reexec.ReExecDriver.run(ReExecDriver.java:230) > at > org.apache.hadoop.hive.cli.CliDriver.processLocalCmd(CliDriver.java:256) > at org.apache.hadoop.hive.cli.CliDriver.processCmd1(CliDriver.java:201) > at org.apache.hadoop.hive.cli.CliDriver.processCmd(CliDriver.java:127) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:422) > at org.apache.hadoop.hive.cli.CliDriver.processLine(CliDriver.java:353) > at > org.apache.hadoop.hive.ql.QTestUtil.executeClientInternal(QTestUtil.java:730) > at org.apache.hadoop.hive.ql.QTestUtil.executeClient(QTestUtil.java:700) > at > org.apache.hadoop.hive.cli.control.CoreCliDriver.runTest(CoreCliDriver.java:170) > at > org.apache.hadoop.hive.cli.control.CliAdapter.runTest(CliAdapter.java:157) > at > org.apache.hadoop.hive.cli.TestMiniLlapLocalCliDriver.testCliDriver(TestMiniLlapLocalCliDriver.java:62) > at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) > at > sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) > at > sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) > at java.lang.reflect.Method.invoke(Method.java:498) > at > org.junit.runners.model.FrameworkMethod$1.runReflectiveCall(FrameworkMethod.java:47) > at > org.junit.internal.runners.model.ReflectiveCallable.run(ReflectiveCallable.java:12) > at > org.junit.runners.model.FrameworkMethod.invokeExplosively(FrameworkMethod.java:44) > at > org.junit.internal.runners.statements.InvokeMethod.evaluate(InvokeMethod.java:17) > at > org.apache.hadoop.hive.cli.control.CliAdapter$2$1.evaluate(CliAdapter.java:135) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner.runLeaf(ParentRunner.java:271) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:70) > at > org.junit.runners.BlockJUnit4ClassRunner.runChild(BlockJUnit4ClassRunner.java:50) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229) > at org.junit.runners.ParentRunner.run(ParentRunner.java:309) > at org.junit.runners.Suite.runChild(Suite.java:127) > at org.junit.runners.Suite.runChild(Suite.java:26) > at org.junit.runners.ParentRunner$3.run(ParentRunner.java:238) > at org.junit.runners.ParentRunner$1.schedule(ParentRunner.java:63) > at org.junit.runners.ParentRunner.runChildren(ParentRunner.java:236) > at org.junit.runners.ParentRunner.access$000(ParentRunner.java:53) > at org.junit.runners.ParentRunner$2.evaluate(ParentRunner.java:229) > at > org.apache.hadoop.hive.cli.control.CliAdapter$1$1.evaluate(CliAdapter.java:95) > at org.junit.rules.RunRules.evaluate(RunRules.java:20) > at org.junit.runners.ParentRunner.run(ParentRunner.java:309) > at > org.apache.maven.surefire.junit4.JUnit4Provider.execute(JUnit4Provider.java:365) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeWithRerun(JUnit4Provider.java:273) > at > org.apache.maven.surefire.junit4.JUnit4Provider.executeTestSet(JUnit4Provider.java:238) > at > org.apache.maven.surefire.junit4.JUnit4Provider.invoke(JUnit4Provider.java:159) > at > org.apache.maven.surefire.booter.ForkedBooter.invokeProviderInSameClassLoader(ForkedBooter.java:379) > at > org.apache.maven.surefire.booter.ForkedBooter.runSuitesInProcess(ForkedBooter.java:340) > at > org.apache.maven.surefire.booter.ForkedBooter.execute(ForkedBooter.java:125) > at > org.apache.maven.surefire.booter.ForkedBooter.main(ForkedBooter.java:413) > Caused by: java.lang.NullPointerException > at > org.apache.hadoop.hive.metastore.RetryingMetaStoreClient.invoke(RetryingMetaStoreClient.java:236) > at com.sun.proxy.$Proxy62.getTableColumnStatistics(Unknown Source) > at > org.apache.hadoop.hive.ql.metadata.Hive.getTableColumnStatistics(Hive.java:5246) > ... 61 more > {noformat} -- This message was sent by Atlassian Jira (v8.3.4#803005)