[ 
https://issues.apache.org/jira/browse/GEODE-8536?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17234127#comment-17234127
 ] 

ASF subversion and git services commented on GEODE-8536:
--------------------------------------------------------

Commit 0f9d6b22ec45b9238bec268161eadeb3647ddb49 in geode's branch 
refs/heads/master from Donal Evans
[ https://gitbox.apache.org/repos/asf?p=geode.git;h=0f9d6b2 ]

GEODE-8536: Allow limited retries when creating Lucene IndexWriter (#5659)

Authored-by: Donal Evans <doev...@vmware.com>
(cherry picked from commit 872718ec9d119e332c328caf4493bdf8e8a83dcf)


> StackOverflow can occur when Lucene IndexWriter is unable to be created
> -----------------------------------------------------------------------
>
>                 Key: GEODE-8536
>                 URL: https://issues.apache.org/jira/browse/GEODE-8536
>             Project: Geode
>          Issue Type: Bug
>          Components: functions, lucene
>    Affects Versions: 1.12.0, 1.13.0, 1.14.0
>            Reporter: Donal Evans
>            Assignee: Donal Evans
>            Priority: Major
>              Labels: pull-request-available
>             Fix For: 1.12.1, 1.14.0, 1.13.1
>
>
> If, during a call to IndexRepositoryFactory.computeIndexRepository(), an 
> IOException is encountered when attempting to construct an IndexWriter, the 
> function retry logic will reattempt the execution. This allows transient 
> exceptions caused by concurrent modification of the fileAndChunk region to be 
> ignored and subsequent executions to succeed (see GEODE-7703). However, if 
> the IOException is consistently thrown, the infinitely retrying function can 
> cause a StackOverflow:
> {noformat}
> java.lang.StackOverflowError
>         at 
> org.apache.geode.SystemFailure.startWatchDog(SystemFailure.java:320)
>         at 
> org.apache.geode.SystemFailure.notifyWatchDog(SystemFailure.java:758)
>         at org.apache.geode.SystemFailure.setFailure(SystemFailure.java:813)
>         at 
> org.apache.geode.SystemFailure.initiateFailure(SystemFailure.java:790)
>         at 
> org.apache.geode.internal.InternalDataSerializer.invokeToData(InternalDataSerializer.java:2251)
>         at 
> org.apache.geode.internal.InternalDataSerializer.basicWriteObject(InternalDataSerializer.java:2031)
>         at 
> org.apache.geode.DataSerializer.writeObject(DataSerializer.java:2839)
>         at 
> org.apache.geode.internal.cache.partitioned.PartitionedRegionFunctionStreamingMessage.toData(PartitionedRegionFunctionStreamingMessage.java:192)
>         at 
> org.apache.geode.internal.serialization.internal.DSFIDSerializerImpl.invokeToData(DSFIDSerializerImpl.java:213)
>         at 
> org.apache.geode.internal.serialization.internal.DSFIDSerializerImpl.write(DSFIDSerializerImpl.java:137)
>         at 
> org.apache.geode.internal.InternalDataSerializer.writeDSFID(InternalDataSerializer.java:1484)
>         at 
> org.apache.geode.internal.tcp.MsgStreamer.writeMessage(MsgStreamer.java:247)
>         at 
> org.apache.geode.distributed.internal.direct.DirectChannel.sendToMany(DirectChannel.java:306)
>         at 
> org.apache.geode.distributed.internal.direct.DirectChannel.sendToOne(DirectChannel.java:182)
>         at 
> org.apache.geode.distributed.internal.direct.DirectChannel.send(DirectChannel.java:511)
>         at 
> org.apache.geode.distributed.internal.DistributionImpl.directChannelSend(DistributionImpl.java:346)
>         at 
> org.apache.geode.distributed.internal.DistributionImpl.send(DistributionImpl.java:291)
>         at 
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendViaMembershipManager(ClusterDistributionManager.java:2058)
>         at 
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendOutgoing(ClusterDistributionManager.java:1986)
>         at 
> org.apache.geode.distributed.internal.ClusterDistributionManager.sendMessage(ClusterDistributionManager.java:2023)
>         at 
> org.apache.geode.distributed.internal.ClusterDistributionManager.putOutgoing(ClusterDistributionManager.java:1083)
>         at 
> org.apache.geode.internal.cache.execute.PartitionedRegionFunctionResultWaiter.getPartitionedDataFrom(PartitionedRegionFunctionResultWaiter.java:89)
>         at 
> org.apache.geode.internal.cache.PartitionedRegion.executeOnAllBuckets(PartitionedRegion.java:4079)
>         at 
> org.apache.geode.internal.cache.PartitionedRegion.executeFunction(PartitionedRegion.java:3583)
>         at 
> org.apache.geode.internal.cache.execute.PartitionedRegionFunctionExecutor.executeFunction(PartitionedRegionFunctionExecutor.java:220)
>         at 
> org.apache.geode.internal.cache.execute.AbstractExecution.execute(AbstractExecution.java:376)
>         at 
> org.apache.geode.internal.cache.execute.AbstractExecution.execute(AbstractExecution.java:359)
>         at 
> org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResultInternal(LocalResultCollectorImpl.java:139)
>         at 
> org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53)
>         at 
> org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResult(LocalResultCollectorImpl.java:112)
>         at 
> org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResultInternal(PRFunctionStreamingResultCollector.java:219)
>         at 
> org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53)
>         at 
> org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResult(PRFunctionStreamingResultCollector.java:88)
>         at 
> org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResultInternal(LocalResultCollectorImpl.java:141)
>         at 
> org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53)
>         at 
> org.apache.geode.internal.cache.execute.LocalResultCollectorImpl.getResult(LocalResultCollectorImpl.java:112)
>         at 
> org.apache.geode.internal.cache.partitioned.PRFunctionStreamingResultCollector.getResultInternal(PRFunctionStreamingResultCollector.java:219)
>         at 
> org.apache.geode.internal.cache.execute.ResultCollectorHolder.getResult(ResultCollectorHolder.java:53)
> {noformat}
> The underlying exception in this case is a FileNotFoundException thrown when 
> attempting to retrieve a Lucene file from the fileAndChunk region.
> {noformat}
> [warn 2020/07/28 23:49:55.375 PDT <Pooled Waiting Message Processor 2> 
> tid=0xab] Exception thrown while constructing Lucene Index for bucket:16 for 
> file region:/_PR/_Bindex#_partitionedRegion.files_16
> org.apache.lucene.index.CorruptIndexException: Unexpected file read error 
> while reading index. (resource=BufferedChecksumIndexInput(segments_4s))
> at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:290)
> at org.apache.lucene.index.IndexFileDeleter.<init>(IndexFileDeleter.java:165)
> at org.apache.lucene.index.IndexWriter.<init>(IndexWriter.java:974)
> at 
> org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.buildIndexWriter(IndexRepositoryFactory.java:152)
> at 
> org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.finishComputingRepository(IndexRepositoryFactory.java:116)
> at 
> org.apache.geode.cache.lucene.internal.IndexRepositoryFactory.computeIndexRepository(IndexRepositoryFactory.java:65)
> at 
> org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.computeRepository(PartitionedRepositoryManager.java:151)
> at 
> org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.lambda$computeRepository$1(PartitionedRepositoryManager.java:170)
> at java.util.concurrent.ConcurrentHashMap.compute(ConcurrentHashMap.java:1892)
> at 
> org.apache.geode.cache.lucene.internal.PartitionedRepositoryManager.computeRepository(PartitionedRepositoryManager.java:162)
> at 
> org.apache.geode.cache.lucene.internal.LuceneBucketListener.lambda$afterPrimary$0(LuceneBucketListener.java:40)
> at 
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149)
> at 
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624)
> at 
> org.apache.geode.distributed.internal.ClusterOperationExecutors.runUntilShutdown(ClusterOperationExecutors.java:442)
> at 
> org.apache.geode.distributed.internal.ClusterOperationExecutors.doWaitingThread(ClusterOperationExecutors.java:411)
> at 
> org.apache.geode.logging.internal.executors.LoggingThreadFactory.lambda$newThread$0(LoggingThreadFactory.java:119)
> at java.lang.Thread.run(Thread.java:748)
> Caused by: java.io.FileNotFoundException: _2p.si
> at 
> org.apache.geode.cache.lucene.internal.filesystem.FileSystem.getFile(FileSystem.java:101)
> at 
> org.apache.geode.cache.lucene.internal.directory.RegionDirectory.openInput(RegionDirectory.java:115)
> at org.apache.lucene.store.Directory.openChecksumInput(Directory.java:137)
> at 
> org.apache.lucene.codecs.lucene62.Lucene62SegmentInfoFormat.read(Lucene62SegmentInfoFormat.java:89)
> at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:357)
> at org.apache.lucene.index.SegmentInfos.readCommit(SegmentInfos.java:288)
> ... 16 more
> {noformat}



--
This message was sent by Atlassian Jira
(v8.3.4#803005)

Reply via email to