[JENKINS] Lucene-Solr-BadApples-Tests-7.x - Build # 244 - Unstable
Build: https://builds.apache.org/job/Lucene-Solr-BadApples-Tests-7.x/244/ 3 tests failed. FAILED: junit.framework.TestSuite.org.apache.solr.handler.admin.AutoscalingHistoryHandlerTest Error Message: ObjectTracker found 2 object(s) that were not released!!! [SolrZkClient, ZkStateReader] org.apache.solr.common.util.ObjectReleaseTracker$ObjectTrackerException: org.apache.solr.common.cloud.SolrZkClient at org.apache.solr.common.util.ObjectReleaseTracker.track(ObjectReleaseTracker.java:42) at org.apache.solr.common.cloud.SolrZkClient.(SolrZkClient.java:203) at org.apache.solr.common.cloud.SolrZkClient.(SolrZkClient.java:126) at org.apache.solr.common.cloud.SolrZkClient.(SolrZkClient.java:116) at org.apache.solr.common.cloud.ZkStateReader.(ZkStateReader.java:306) at org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider.connect(ZkClientClusterStateProvider.java:160) at org.apache.solr.client.solrj.impl.CloudSolrClient.connect(CloudSolrClient.java:399) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:827) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:950) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:997) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:817) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:194) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:211) at org.apache.solr.client.solrj.impl.SolrClientCloudManager.request(SolrClientCloudManager.java:115) at org.apache.solr.cloud.autoscaling.SystemLogListener.onEvent(SystemLogListener.java:118) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerListeners.fireListeners(ScheduledTriggers.java:807) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerListeners.fireListeners(ScheduledTriggers.java:774) at org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$add$4(ScheduledTriggers.java:285) at org.apache.solr.cloud.autoscaling.NodeLostTrigger.run(NodeLostTrigger.java:185) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerWrapper.run(ScheduledTriggers.java:604) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.run(ScheduledThreadPoolExecutor.java:294) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1149) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:624) at java.lang.Thread.run(Thread.java:748) org.apache.solr.common.util.ObjectReleaseTracker$ObjectTrackerException: org.apache.solr.common.cloud.ZkStateReader at org.apache.solr.common.util.ObjectReleaseTracker.track(ObjectReleaseTracker.java:42) at org.apache.solr.common.cloud.ZkStateReader.(ZkStateReader.java:328) at org.apache.solr.client.solrj.impl.ZkClientClusterStateProvider.connect(ZkClientClusterStateProvider.java:160) at org.apache.solr.client.solrj.impl.CloudSolrClient.connect(CloudSolrClient.java:399) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:827) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:950) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:997) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:817) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:194) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:211) at org.apache.solr.client.solrj.impl.SolrClientCloudManager.request(SolrClientCloudManager.java:115) at org.apache.solr.cloud.autoscaling.SystemLogListener.onEvent(SystemLogListener.java:118) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerListeners.fireListeners(ScheduledTriggers.java:807) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerListeners.fireListeners(ScheduledTriggers.java:774) at org.apache.solr.cloud.autoscaling.ScheduledTriggers.lambda$add$4(ScheduledTriggers.java:285) at org.apache.solr.cloud.autoscaling.NodeLostTrigger.run(NodeLostTrigger.java:185) at org.apache.solr.cloud.autoscaling.ScheduledTriggers$TriggerWrapper.run(ScheduledTriggers.java:604) at java.util.concurrent.Executors$RunnableAdapter.call(Executors.java:511) at java.util.concurrent.FutureTask.runAndReset(FutureTask.java:308) at java.util.concurrent.ScheduledThreadPoolExecutor$ScheduledFutureTask.access$301(ScheduledThreadPoolExecutor.java:180) at
[JENKINS] Lucene-Solr-NightlyTests-master - Build # 1729 - Still Failing
Build: https://builds.apache.org/job/Lucene-Solr-NightlyTests-master/1729/ 6 tests failed. FAILED: org.apache.lucene.search.TestSearchAfter.testQueries Error Message: expected:<1085> but was:<1000> Stack Trace: java.lang.AssertionError: expected:<1085> but was:<1000> at __randomizedtesting.SeedInfo.seed([AADABE77DE057623:F65472ACC46CC38D]:0) at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.failNotEquals(Assert.java:834) at org.junit.Assert.assertEquals(Assert.java:645) at org.junit.Assert.assertEquals(Assert.java:631) at org.apache.lucene.search.TestSearchAfter.assertPage(TestSearchAfter.java:268) at org.apache.lucene.search.TestSearchAfter.assertQuery(TestSearchAfter.java:260) at org.apache.lucene.search.TestSearchAfter.testQueries(TestSearchAfter.java:183) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:54) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at java.lang.Thread.run(Thread.java:748) FAILED: org.apache.solr.cloud.DeleteReplicaTest.deleteLiveReplicaTest Error Message: expected:<0> but was:<1> Stack Trace: java.lang.AssertionError: expected:<0> but was:<1> at __randomizedtesting.SeedInfo.seed([2B4E10A034E66019:862EA4AB29D9C86C]:0) at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.failNotEquals(Assert.java:834) at org.junit.Assert.assertEquals(Assert.java:645)
[JENKINS] Lucene-Solr-BadApples-master-Linux (32bit/jdk1.8.0_172) - Build # 138 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-BadApples-master-Linux/138/ Java: 32bit/jdk1.8.0_172 -client -XX:+UseParallelGC 1 tests failed. FAILED: org.apache.solr.handler.TestSystemCollAutoCreate.testAutoCreate Error Message: expected:<1024> but was: Stack Trace: java.lang.AssertionError: expected:<1024> but was: at __randomizedtesting.SeedInfo.seed([30E0ED7350644730:FBB59A2E61A8E5D]:0) at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.failNotEquals(Assert.java:834) at org.junit.Assert.assertEquals(Assert.java:118) at org.junit.Assert.assertEquals(Assert.java:144) at org.apache.solr.handler.TestBlobHandler.checkBlobPost(TestBlobHandler.java:106) at org.apache.solr.handler.TestSystemCollAutoCreate.testAutoCreate(TestSystemCollAutoCreate.java:26) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsFixedStatement.callStatement(BaseDistributedSearchTestCase.java:1070) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsStatement.evaluate(BaseDistributedSearchTestCase.java:1042) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:54) at
[JENKINS] Lucene-Solr-master-MacOSX (64bit/jdk1.8.0) - Build # 4981 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-master-MacOSX/4981/ Java: 64bit/jdk1.8.0 -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC 3 tests failed. FAILED: org.apache.solr.cloud.autoscaling.sim.TestSimTriggerIntegration.testNodeLostTriggerRestoreState Error Message: The trigger did not fire at all Stack Trace: java.lang.AssertionError: The trigger did not fire at all at __randomizedtesting.SeedInfo.seed([E16C05018E6AFDBA:CA93D05A1412E86A]:0) at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.assertTrue(Assert.java:41) at org.apache.solr.cloud.autoscaling.sim.TestSimTriggerIntegration.testNodeLostTriggerRestoreState(TestSimTriggerIntegration.java:332) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:54) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at java.lang.Thread.run(Thread.java:748) FAILED: org.apache.solr.cloud.autoscaling.sim.TestSimTriggerIntegration.testNodeMarkersRegistration Error Message: Path /autoscaling/nodeAdded/127.0.0.1:10031_solr should have been deleted Stack Trace:
[JENKINS-EA] Lucene-Solr-7.x-Linux (64bit/jdk-12-ea+23) - Build # 3248 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-7.x-Linux/3248/ Java: 64bit/jdk-12-ea+23 -XX:+UseCompressedOops -XX:+UseConcMarkSweepGC 3 tests failed. FAILED: junit.framework.TestSuite.org.apache.solr.cloud.TestCloudSearcherWarming Error Message: 5 threads leaked from SUITE scope at org.apache.solr.cloud.TestCloudSearcherWarming: 1) Thread[id=14774, name=SyncThread:0, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/jdk.internal.misc.Unsafe.park(Native Method) at java.base@12-ea/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@12-ea/java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2081) at java.base@12-ea/java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:433) at app//org.apache.zookeeper.server.SyncRequestProcessor.run(SyncRequestProcessor.java:127) 2) Thread[id=14775, name=ProcessThread(sid:0 cport:40859):, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/jdk.internal.misc.Unsafe.park(Native Method) at java.base@12-ea/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@12-ea/java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2081) at java.base@12-ea/java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:433) at app//org.apache.zookeeper.server.PrepRequestProcessor.run(PrepRequestProcessor.java:123) 3) Thread[id=14771, name=ZkTestServer Run Thread, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/java.lang.Object.wait(Native Method) at java.base@12-ea/java.lang.Thread.join(Thread.java:1308) at java.base@12-ea/java.lang.Thread.join(Thread.java:1375) at app//org.apache.zookeeper.server.NIOServerCnxnFactory.join(NIOServerCnxnFactory.java:313) at app//org.apache.solr.cloud.ZkTestServer$ZKServerMain.runFromConfig(ZkTestServer.java:343) at app//org.apache.solr.cloud.ZkTestServer$2.run(ZkTestServer.java:564)4) Thread[id=14773, name=SessionTracker, state=TIMED_WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/java.lang.Object.wait(Native Method) at app//org.apache.zookeeper.server.SessionTrackerImpl.run(SessionTrackerImpl.java:147) 5) Thread[id=14772, name=NIOServerCxn.Factory:0.0.0.0/0.0.0.0:0, state=RUNNABLE, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/sun.nio.ch.EPoll.wait(Native Method) at java.base@12-ea/sun.nio.ch.EPollSelectorImpl.doSelect(EPollSelectorImpl.java:120) at java.base@12-ea/sun.nio.ch.SelectorImpl.lockAndDoSelect(SelectorImpl.java:124) at java.base@12-ea/sun.nio.ch.SelectorImpl.select(SelectorImpl.java:136) at app//org.apache.zookeeper.server.NIOServerCnxnFactory.run(NIOServerCnxnFactory.java:196) at java.base@12-ea/java.lang.Thread.run(Thread.java:835) Stack Trace: com.carrotsearch.randomizedtesting.ThreadLeakError: 5 threads leaked from SUITE scope at org.apache.solr.cloud.TestCloudSearcherWarming: 1) Thread[id=14774, name=SyncThread:0, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/jdk.internal.misc.Unsafe.park(Native Method) at java.base@12-ea/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@12-ea/java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2081) at java.base@12-ea/java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:433) at app//org.apache.zookeeper.server.SyncRequestProcessor.run(SyncRequestProcessor.java:127) 2) Thread[id=14775, name=ProcessThread(sid:0 cport:40859):, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/jdk.internal.misc.Unsafe.park(Native Method) at java.base@12-ea/java.util.concurrent.locks.LockSupport.park(LockSupport.java:194) at java.base@12-ea/java.util.concurrent.locks.AbstractQueuedSynchronizer$ConditionObject.await(AbstractQueuedSynchronizer.java:2081) at java.base@12-ea/java.util.concurrent.LinkedBlockingQueue.take(LinkedBlockingQueue.java:433) at app//org.apache.zookeeper.server.PrepRequestProcessor.run(PrepRequestProcessor.java:123) 3) Thread[id=14771, name=ZkTestServer Run Thread, state=WAITING, group=TGRP-TestCloudSearcherWarming] at java.base@12-ea/java.lang.Object.wait(Native Method) at java.base@12-ea/java.lang.Thread.join(Thread.java:1308) at java.base@12-ea/java.lang.Thread.join(Thread.java:1375) at app//org.apache.zookeeper.server.NIOServerCnxnFactory.join(NIOServerCnxnFactory.java:313) at app//org.apache.solr.cloud.ZkTestServer$ZKServerMain.runFromConfig(ZkTestServer.java:343) at
[JENKINS] Lucene-Solr-7.x-Windows (64bit/jdk-10.0.1) - Build # 919 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-7.x-Windows/919/ Java: 64bit/jdk-10.0.1 -XX:-UseCompressedOops -XX:+UseG1GC 4 tests failed. FAILED: org.apache.solr.handler.TestSQLHandler.doTest Error Message: --> http://127.0.0.1:56641/collection1_shard2_replica_n1:Failed to execute sqlQuery 'select id, field_i, str_s, field_i_p, field_f_p, field_d_p, field_l_p from collection1 where (text='()' OR text='') AND text='' order by field_i desc' against JDBC connection 'jdbc:calcitesolr:'. Error while executing SQL "select id, field_i, str_s, field_i_p, field_f_p, field_d_p, field_l_p from collection1 where (text='()' OR text='') AND text='' order by field_i desc": java.io.IOException: java.util.concurrent.ExecutionException: java.io.IOException: --> http://127.0.0.1:56641/collection1_shard2_replica_n1/:id{type=string,properties=indexed,stored,sortMissingLast,uninvertible} must have DocValues to use this feature. Stack Trace: java.io.IOException: --> http://127.0.0.1:56641/collection1_shard2_replica_n1:Failed to execute sqlQuery 'select id, field_i, str_s, field_i_p, field_f_p, field_d_p, field_l_p from collection1 where (text='()' OR text='') AND text='' order by field_i desc' against JDBC connection 'jdbc:calcitesolr:'. Error while executing SQL "select id, field_i, str_s, field_i_p, field_f_p, field_d_p, field_l_p from collection1 where (text='()' OR text='') AND text='' order by field_i desc": java.io.IOException: java.util.concurrent.ExecutionException: java.io.IOException: --> http://127.0.0.1:56641/collection1_shard2_replica_n1/:id{type=string,properties=indexed,stored,sortMissingLast,uninvertible} must have DocValues to use this feature. at __randomizedtesting.SeedInfo.seed([E10BC53F89CB6C03:464F7D9BE4707FBA]:0) at org.apache.solr.client.solrj.io.stream.SolrStream.read(SolrStream.java:215) at org.apache.solr.handler.TestSQLHandler.getTuples(TestSQLHandler.java:2617) at org.apache.solr.handler.TestSQLHandler.testBasicSelect(TestSQLHandler.java:145) at org.apache.solr.handler.TestSQLHandler.doTest(TestSQLHandler.java:93) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.base/java.lang.reflect.Method.invoke(Method.java:564) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsFixedStatement.callStatement(BaseDistributedSearchTestCase.java:1063) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsStatement.evaluate(BaseDistributedSearchTestCase.java:1035) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at
[jira] [Created] (SOLR-13091) REBALANCELEADERS is broken
Erick Erickson created SOLR-13091: - Summary: REBALANCELEADERS is broken Key: SOLR-13091 URL: https://issues.apache.org/jira/browse/SOLR-13091 Project: Solr Issue Type: Bug Security Level: Public (Default Security Level. Issues are Public) Components: SolrCloud Affects Versions: 7.6 Reporter: Erick Erickson Assignee: Erick Erickson >From the user's list, the REBALANCELEADERS API doesn't seem to work correctly. >Manual testing shows the problem. It's disturbing that the unit test doesn't catch this. That'll be the first thing to fix. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Assigned] (SOLR-10935) BALANCESHARDUNIQUE does not distribute properties correctly
[ https://issues.apache.org/jira/browse/SOLR-10935?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Erick Erickson reassigned SOLR-10935: - Assignee: Erick Erickson > BALANCESHARDUNIQUE does not distribute properties correctly > --- > > Key: SOLR-10935 > URL: https://issues.apache.org/jira/browse/SOLR-10935 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCloud >Affects Versions: 6.2 >Reporter: Daisy.Yuan >Assignee: Erick Erickson >Priority: Major > > Create a collection of 8 slices on 4 nodes, 2 replicas of each slice. > Node IP is: > 192.168.182.246:21101 > 192.168.182.247:21104 > 192.168.182.248:21101 > 192.168.182.149:21104 > After executing the BALANCESHARDUNIQUE command, the leader node is balanced > as follows: > Cloud Graph (* Leader) > shard1 |- * 192.168.182.248:21101 > |- 192.168.182.247.21104 > shard2 |- * 192.168.182.249:21104 > |- 192.168.182.246:21101 > shard3 |- 192.168.182.247:21104 > |- * 192.168.182.246:21101 > shard4 |- 192.168.182.248:21101 > |- * 192.168.182.249:21104 > shard5 |- 192.168.182.249:21104 > |- * 192.168.182.246:21101 > shard6 |- * 192.168.182.248:21101 > |- 192.168.182.247:21104 > shard7 |- 192.168.182.248:21101 > |- * 192.168.182.249:21104 > shard8 |- * 192.168.182.247:21104 > |- 192.168.182.246:21101 > The correct expected result should be that there are two leader on each node. > But the actual result is.. > there are 3 leaders on 192.168.182.249:21104, > and only one Leader on 192.168.182.247:21104 > the others are distributed correctly. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Assigned] (SOLR-11998) RebalanceLeaders API broken response format with wt=JSON
[ https://issues.apache.org/jira/browse/SOLR-11998?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Erick Erickson reassigned SOLR-11998: - Assignee: Erick Erickson > RebalanceLeaders API broken response format with wt=JSON > > > Key: SOLR-11998 > URL: https://issues.apache.org/jira/browse/SOLR-11998 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCloud >Reporter: Shalin Shekhar Mangar >Assignee: Erick Erickson >Priority: Major > Fix For: 7.6, master (8.0) > > > RebalanceLeaders has a weird looking JSON output because it uses NamedList > instead of SimpleOrderedMap. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-11999) RebalanceLeaders API should not require a preferredLeader property
[ https://issues.apache.org/jira/browse/SOLR-11999?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16727163#comment-16727163 ] Erick Erickson commented on SOLR-11999: --- Are you thinking along the lines of "if a shard has no replica with preferredLeader, then choose one"? We should keep the _ability_ to designate a particular node to be leader if possible, I'm guessing this ticket would add the ability to not _require_ that that property be on a node. > RebalanceLeaders API should not require a preferredLeader property > -- > > Key: SOLR-11999 > URL: https://issues.apache.org/jira/browse/SOLR-11999 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCloud >Reporter: Shalin Shekhar Mangar >Priority: Major > Fix For: 7.6, master (8.0) > > > Rebalance leaders API requires that nodes be set with preferredLeaders > property. But in theory this is not required. We can choose replicas on > unique nodes to be leaders in the absence of the preferredLeader property. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-repro - Build # 2534 - Unstable
Build: https://builds.apache.org/job/Lucene-Solr-repro/2534/ [...truncated 28 lines...] [repro] Jenkins log URL: https://builds.apache.org/job/Lucene-Solr-SmokeRelease-master/1213/consoleText [repro] Revision: 272178eff5c414e9df76e5cd4bd5bfbcf0e74249 [repro] Ant options: -DsmokeTestRelease.java9=/home/jenkins/tools/java/latest1.9 [repro] Repro line: ant test -Dtestcase=TestRecovery -Dtests.method=stressLogReplay -Dtests.seed=1F4890E2EF9636D4 -Dtests.multiplier=2 -Dtests.locale=ar-MA -Dtests.timezone=America/Chihuahua -Dtests.asserts=true -Dtests.file.encoding=ISO-8859-1 [repro] git rev-parse --abbrev-ref HEAD [repro] git rev-parse HEAD [repro] Initial local git branch/revision: 1d0a08621708dde4220f9b45886a2c147d7fefd1 [repro] git fetch [repro] git checkout 272178eff5c414e9df76e5cd4bd5bfbcf0e74249 [...truncated 2 lines...] [repro] git merge --ff-only [...truncated 1 lines...] [repro] ant clean [...truncated 6 lines...] [repro] Test suites by module: [repro]solr/core [repro] TestRecovery [repro] ant compile-test [...truncated 3592 lines...] [repro] ant test-nocompile -Dtests.dups=5 -Dtests.maxfailures=5 -Dtests.class="*.TestRecovery" -Dtests.showOutput=onerror -DsmokeTestRelease.java9=/home/jenkins/tools/java/latest1.9 -Dtests.seed=1F4890E2EF9636D4 -Dtests.multiplier=2 -Dtests.locale=ar-MA -Dtests.timezone=America/Chihuahua -Dtests.asserts=true -Dtests.file.encoding=ISO-8859-1 [...truncated 3505 lines...] [repro] Setting last failure code to 256 [repro] Failures: [repro] 1/5 failed: org.apache.solr.search.TestRecovery [repro] git checkout 1d0a08621708dde4220f9b45886a2c147d7fefd1 [...truncated 2 lines...] [repro] Exiting with code 256 [...truncated 6 lines...] - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-BadApples-NightlyTests-master - Build # 42 - Still Failing
Build: https://builds.apache.org/job/Lucene-Solr-BadApples-NightlyTests-master/42/ 6 tests failed. FAILED: org.apache.solr.cloud.FullSolrCloudDistribCmdsTest.test Error Message: Java heap space Stack Trace: java.lang.OutOfMemoryError: Java heap space at java.util.Arrays.copyOf(Arrays.java:3332) at java.lang.AbstractStringBuilder.ensureCapacityInternal(AbstractStringBuilder.java:124) at java.lang.AbstractStringBuilder.append(AbstractStringBuilder.java:649) at java.lang.StringBuilder.append(StringBuilder.java:202) at org.apache.http.client.utils.URLEncodedUtils.urlEncode(URLEncodedUtils.java:539) at org.apache.http.client.utils.URLEncodedUtils.encodeFormFields(URLEncodedUtils.java:655) at org.apache.http.client.utils.URLEncodedUtils.format(URLEncodedUtils.java:403) at org.apache.http.client.utils.URLEncodedUtils.format(URLEncodedUtils.java:381) at org.apache.http.client.entity.UrlEncodedFormEntity.(UrlEncodedFormEntity.java:75) at org.apache.solr.client.solrj.impl.HttpSolrClient.fillContentStream(HttpSolrClient.java:514) at org.apache.solr.client.solrj.impl.HttpSolrClient.createMethod(HttpSolrClient.java:421) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:254) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:245) at org.apache.solr.client.solrj.impl.LBSolrClient.doRequest(LBSolrClient.java:368) at org.apache.solr.client.solrj.impl.LBSolrClient.request(LBSolrClient.java:296) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.request(LBHttpSolrClient.java:213) at org.apache.solr.client.solrj.impl.CloudSolrClient.sendRequest(CloudSolrClient.java:1110) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:884) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:817) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:207) at org.apache.solr.client.solrj.SolrClient.query(SolrClient.java:1015) at org.apache.solr.client.solrj.SolrClient.query(SolrClient.java:1031) at org.apache.solr.cloud.CloudInspectUtil.compareResults(CloudInspectUtil.java:228) at org.apache.solr.cloud.CloudInspectUtil.compareResults(CloudInspectUtil.java:167) at org.apache.solr.cloud.FullSolrCloudDistribCmdsTest.testIndexingBatchPerRequestWithHttpSolrClient(FullSolrCloudDistribCmdsTest.java:669) at org.apache.solr.cloud.FullSolrCloudDistribCmdsTest.test(FullSolrCloudDistribCmdsTest.java:153) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) FAILED: org.apache.solr.cloud.RestartWhileUpdatingTest.test Error Message: There are still nodes recoverying - waited for 320 seconds Stack Trace: java.lang.AssertionError: There are still nodes recoverying - waited for 320 seconds at __randomizedtesting.SeedInfo.seed([5EB41C54616B95B7:D6E0238ECF97F84F]:0) at org.junit.Assert.fail(Assert.java:88) at org.apache.solr.cloud.AbstractDistribZkTestBase.waitForRecoveriesToFinish(AbstractDistribZkTestBase.java:195) at org.apache.solr.cloud.AbstractFullDistribZkTestBase.waitForRecoveriesToFinish(AbstractFullDistribZkTestBase.java:1038) at org.apache.solr.cloud.AbstractFullDistribZkTestBase.waitForThingsToLevelOut(AbstractFullDistribZkTestBase.java:1595) at org.apache.solr.cloud.RestartWhileUpdatingTest.test(RestartWhileUpdatingTest.java:143) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsFixedStatement.callStatement(BaseDistributedSearchTestCase.java:1070) at
[jira] [Comment Edited] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16727120#comment-16727120 ] Joel Bernstein edited comment on SOLR-13088 at 12/21/18 10:55 PM: -- Full power of Math Expressions inside of Zeppelin. zplot formats the output so Zeppelin can plot. Here is a sample expression: {code:java} let(r=random(collection1, q="*:*", fl="response_d, filesize_d", rows="500"), x=col(r, filesize_d), y=col(r, response_d), s=pairSort(x,y), zplot(x=rowAt(s, 0), y=rowAt(s,1))){code} The example above takes a random sample, extracts two vectors, pair sorts the vectors and then formats the vectors for output with zplot. I'll attach a screenshot of the result (Screen Shot 2018-12-21 at 5.53.18 PM.png). was (Author: joel.bernstein): Full power of Math Expressions inside of Zeppelin. zplot formats the output so Zeppelin can plot. Here is a sample expression: {code:java} let(r=random(collection1, q="*:*", fl="response_d, filesize_d", rows="500"), x=col(r, filesize_d), y=col(r, response_d), s=pairSort(x,y), zplot(x=rowAt(s, 0), y=rowAt(s,1))){code} The example above takes a random sample, extracts to vectors, pair sorts the vectors and then formats the vectors for output with zplot. I'll attach a screenshot of the result (Screen Shot 2018-12-21 at 5.53.18 PM.png). > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 5.53.18 > PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Attachment: (was: Screen Shot 2018-12-21 at 3.01.55 PM.png) > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 5.53.18 > PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Comment Edited] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16727120#comment-16727120 ] Joel Bernstein edited comment on SOLR-13088 at 12/21/18 10:54 PM: -- Full power of Math Expressions inside of Zeppelin. zplot formats the output so Zeppelin can plot. Here is a sample expression: {code:java} let(r=random(collection1, q="*:*", fl="response_d, filesize_d", rows="500"), x=col(r, filesize_d), y=col(r, response_d), s=pairSort(x,y), zplot(x=rowAt(s, 0), y=rowAt(s,1))){code} The example above takes a random sample, extracts to vectors, pair sorts the vectors and then formats the vectors for output with zplot. I'll attach a screenshot of the result (Screen Shot 2018-12-21 at 5.53.18 PM.png). was (Author: joel.bernstein): Full power of Math Expressions inside of Zeppelin. zplot formats the output so Zeppelin can plot. Here is a sample expression: {code:java} let(r=random(collection1, q="*:*", fl="response_d, filesize_d", rows="500"), x=col(r, filesize_d), y=col(r, response_d), s=pairSort(x,y), zplot(x=rowAt(s, 0), y=rowAt(s,1))){code} The example above takes a random sample, extracts to vectors, pair sorts the vectors and then formats the vectors for output with zplot. I'll attach a screenshot of the result. > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 3.01.55 > PM.png, Screen Shot 2018-12-21 at 5.53.18 PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16727120#comment-16727120 ] Joel Bernstein commented on SOLR-13088: --- Full power of Math Expressions inside of Zeppelin. zplot formats the output so Zeppelin can plot. Here is a sample expression: {code:java} let(r=random(collection1, q="*:*", fl="response_d, filesize_d", rows="500"), x=col(r, filesize_d), y=col(r, response_d), s=pairSort(x,y), zplot(x=rowAt(s, 0), y=rowAt(s,1))){code} The example above takes a random sample, extracts to vectors, pair sorts the vectors and then formats the vectors for output with zplot. I'll attach a screenshot of the result. > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 3.01.55 > PM.png, Screen Shot 2018-12-21 at 5.53.18 PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Attachment: Screen Shot 2018-12-21 at 5.53.18 PM.png > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 3.01.55 > PM.png, Screen Shot 2018-12-21 at 5.53.18 PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16727093#comment-16727093 ] Eric Pugh commented on SOLR-13088: -- Reading through this, is the idea that I could cut'n'paste a Streaming expression from the Solr Admin panel, paste it into Zeppelin, and get a great visualization? Or do I need to tweak my streaming expression to incorporate the `zplot`? What I want is to take my streaming expression, and get a great visualization in Zeppelin.. Which might mean more logic in the Zeppelin visualization side? > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 3.01.55 > PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Attachment: Screen Shot 2018-12-21 at 3.01.55 PM.png > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch, Screen Shot 2018-12-21 at 3.01.55 > PM.png > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Resolved] (SOLR-13080) TermsQParserPlugin automaton method fails to sort input
[ https://issues.apache.org/jira/browse/SOLR-13080?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] David Smiley resolved SOLR-13080. - Resolution: Fixed Fix Version/s: 7.7 > TermsQParserPlugin automaton method fails to sort input > --- > > Key: SOLR-13080 > URL: https://issues.apache.org/jira/browse/SOLR-13080 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Affects Versions: 7.5 >Reporter: Daniel Lowe >Assignee: David Smiley >Priority: Minor > Fix For: 7.7 > > Attachments: SOLR-13080.patch > > > The contract for Automata.makeStringUnion is that the input is sorted. As > BytesRef implements Comparable. The simplest fix would probably to make > Arrays.sort(bytesRefs); > The first line of automaton's makeFilter method in TermsQParserPlugin. > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-13080) TermsQParserPlugin automaton method fails to sort input
[ https://issues.apache.org/jira/browse/SOLR-13080?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726961#comment-16726961 ] ASF subversion and git services commented on SOLR-13080: Commit bcf1a4eaf8d3b945f11833c9dfed4a17a6d6ac58 in lucene-solr's branch refs/heads/branch_7x from David Wayne Smiley [ https://git-wip-us.apache.org/repos/asf?p=lucene-solr.git;h=bcf1a4e ] SOLR-13080: TermsQParserPlugin automaton method should (must?) sort input (cherry picked from commit 1d0a08621708dde4220f9b45886a2c147d7fefd1) > TermsQParserPlugin automaton method fails to sort input > --- > > Key: SOLR-13080 > URL: https://issues.apache.org/jira/browse/SOLR-13080 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Affects Versions: 7.5 >Reporter: Daniel Lowe >Assignee: David Smiley >Priority: Minor > Attachments: SOLR-13080.patch > > > The contract for Automata.makeStringUnion is that the input is sorted. As > BytesRef implements Comparable. The simplest fix would probably to make > Arrays.sort(bytesRefs); > The first line of automaton's makeFilter method in TermsQParserPlugin. > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-13080) TermsQParserPlugin automaton method fails to sort input
[ https://issues.apache.org/jira/browse/SOLR-13080?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726959#comment-16726959 ] ASF subversion and git services commented on SOLR-13080: Commit 1d0a08621708dde4220f9b45886a2c147d7fefd1 in lucene-solr's branch refs/heads/master from David Wayne Smiley [ https://git-wip-us.apache.org/repos/asf?p=lucene-solr.git;h=1d0a086 ] SOLR-13080: TermsQParserPlugin automaton method should (must?) sort input > TermsQParserPlugin automaton method fails to sort input > --- > > Key: SOLR-13080 > URL: https://issues.apache.org/jira/browse/SOLR-13080 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Affects Versions: 7.5 >Reporter: Daniel Lowe >Assignee: David Smiley >Priority: Minor > Attachments: SOLR-13080.patch > > > The contract for Automata.makeStringUnion is that the input is sorted. As > BytesRef implements Comparable. The simplest fix would probably to make > Arrays.sort(bytesRefs); > The first line of automaton's makeFilter method in TermsQParserPlugin. > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243646386 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr issue #525: LUCENE-8585: Index-time jump-tables for DocValues
Github user jpountz commented on the issue: https://github.com/apache/lucene-solr/pull/525 I forgot to mention: precommit complains about unused imports. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243649022 --- Diff: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java --- @@ -1204,6 +1204,12 @@ public void testRandomSortedBytes() throws IOException { } private void doTestNumericsVsStoredFields(double density, LongSupplier longs) throws Exception { +doTestNumericsVsStoredFields(density, longs, 256); +// TODO: 20 docs are needed to test jumps properly (see LUCENE-8585), but that is quite slow (few minutes). +// Maybe it can be nightly? +//doTestNumericsVsStoredFields(density, longs, 20); --- End diff -- By the way if we want to test such large numbers of documents, we should avoid RandomIndexWriter (use IndexWriter instead) and set reasonable values of maxBufferedDocs+RAMSizeBufferMB+mergePolicy instead of relying on random values that are set by LuceneTestCase#newIndexWriterConfig and make indexing slow (but are useful to increase coverage of flushing+merging) --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243647937 --- Diff: lucene/core/src/test/org/apache/lucene/index/TestDocValues.java --- @@ -123,8 +124,72 @@ public void testNumericField() throws Exception { iw.close(); dir.close(); } - - /** + + // The LUCENE-8585 jump-tables enables O(1) skipping of IndexedDISI blocks, DENSE block lookup + // and numeric multi blocks. This test focuses on testing these jumps. + @Slow + public void testNumericFieldJumpTables() throws Exception { +Directory dir = newDirectory(); +IndexWriter iw = new IndexWriter(dir, newIndexWriterConfig(null)); +final int maxDoc = atLeast(3*65536); // Must be above 3*65536 to trigger IndexedDISI block skips --- End diff -- Why do we have this test here as opposed to TestLucene80DocValuesFormat? Given that you are indexing so many documents and don't care about testing indexing as much as you care about checking iterators, you should make sure that the index writer config isn't too crazy and enforce sane values of maxBufferedDocs, RAMBufferSizeMB, and a sane merge policy (eg. not the alcoholic one). See for instance TestLucene70DocValuesFormat#doTestSortedNumericBlocksOfVariousBitsPerValue. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243644347 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java --- @@ -0,0 +1,1430 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.apache.lucene.util.packed.DirectReader; + +/** reader for {@link Lucene80DocValuesFormat} */ +final class Lucene80DocValuesProducer extends DocValuesProducer implements Closeable { + private final Map numerics = new HashMap<>(); + private final Map binaries = new HashMap<>(); + private final Map sorted = new HashMap<>(); + private final Map sortedSets = new HashMap<>(); + private final Map sortedNumerics = new HashMap<>(); + private long ramBytesUsed; + private final IndexInput data; + private final int maxDoc; + + /** expert: instantiates a new reader */ + Lucene80DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { +String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); +this.maxDoc = state.segmentInfo.maxDoc(); +ramBytesUsed = RamUsageEstimator.shallowSizeOfInstance(getClass()); + +int version = -1; + +// read in the entries from the metadata file. +try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) { + Throwable priorE = null; + try { +version = CodecUtil.checkIndexHeader(in, metaCodec, + Lucene80DocValuesFormat.VERSION_START, + Lucene80DocValuesFormat.VERSION_CURRENT, +state.segmentInfo.getId(), +state.segmentSuffix); +readFields(in, state.fieldInfos); + } catch (Throwable exception) { +priorE = exception; + } finally { +CodecUtil.checkFooter(in, priorE); + } +} + +String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); +this.data = state.directory.openInput(dataName, state.context); +boolean success = false; +try { + final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, + Lucene80DocValuesFormat.VERSION_START, +
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243644248 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java --- @@ -0,0 +1,1430 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.apache.lucene.util.packed.DirectReader; + +/** reader for {@link Lucene80DocValuesFormat} */ +final class Lucene80DocValuesProducer extends DocValuesProducer implements Closeable { + private final Map numerics = new HashMap<>(); + private final Map binaries = new HashMap<>(); + private final Map sorted = new HashMap<>(); + private final Map sortedSets = new HashMap<>(); + private final Map sortedNumerics = new HashMap<>(); + private long ramBytesUsed; + private final IndexInput data; + private final int maxDoc; + + /** expert: instantiates a new reader */ + Lucene80DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { +String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); +this.maxDoc = state.segmentInfo.maxDoc(); +ramBytesUsed = RamUsageEstimator.shallowSizeOfInstance(getClass()); + +int version = -1; + +// read in the entries from the metadata file. +try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) { + Throwable priorE = null; + try { +version = CodecUtil.checkIndexHeader(in, metaCodec, + Lucene80DocValuesFormat.VERSION_START, + Lucene80DocValuesFormat.VERSION_CURRENT, +state.segmentInfo.getId(), +state.segmentSuffix); +readFields(in, state.fieldInfos); + } catch (Throwable exception) { +priorE = exception; + } finally { +CodecUtil.checkFooter(in, priorE); + } +} + +String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); +this.data = state.directory.openInput(dataName, state.context); +boolean success = false; +try { + final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, + Lucene80DocValuesFormat.VERSION_START, +
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243642604 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesConsumer.java --- @@ -0,0 +1,663 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + + +import java.io.Closeable; +import java.io.IOException; +import java.util.Arrays; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesConsumer; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.EmptyDocValuesProducer; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.SegmentWriteState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.SortedSetSelector; +import org.apache.lucene.store.GrowableByteArrayDataOutput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RAMOutputStream; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.BytesRefBuilder; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.MathUtil; +import org.apache.lucene.util.StringHelper; +import org.apache.lucene.util.packed.DirectMonotonicWriter; +import org.apache.lucene.util.packed.DirectWriter; + +import static org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat.DIRECT_MONOTONIC_BLOCK_SHIFT; +import static org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat.NUMERIC_BLOCK_SHIFT; +import static org.apache.lucene.codecs.lucene80.Lucene80DocValuesFormat.NUMERIC_BLOCK_SIZE; + +/** writer for {@link Lucene80DocValuesFormat} */ +final class Lucene80DocValuesConsumer extends DocValuesConsumer implements Closeable { + + IndexOutput data, meta; + final int maxDoc; + + /** expert: Creates a new writer */ + public Lucene80DocValuesConsumer(SegmentWriteState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { +boolean success = false; +try { + String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); + data = state.directory.createOutput(dataName, state.context); + CodecUtil.writeIndexHeader(data, dataCodec, Lucene80DocValuesFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); + String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); + meta = state.directory.createOutput(metaName, state.context); + CodecUtil.writeIndexHeader(meta, metaCodec, Lucene80DocValuesFormat.VERSION_CURRENT, state.segmentInfo.getId(), state.segmentSuffix); + maxDoc = state.segmentInfo.maxDoc(); + success = true; +} finally { + if (!success) { +IOUtils.closeWhileHandlingException(this); + } +} + } + + @Override + public void close() throws IOException { +boolean success = false; +try { + if (meta != null) { +meta.writeInt(-1); // write EOF marker +CodecUtil.writeFooter(meta); // write checksum + } + if (data != null) { +CodecUtil.writeFooter(data); // write checksum + } + success = true; +} finally { + if (success) { +IOUtils.close(data, meta); + } else {
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243643855 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/Lucene80DocValuesProducer.java --- @@ -0,0 +1,1430 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.Closeable; +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import org.apache.lucene.codecs.CodecUtil; +import org.apache.lucene.codecs.DocValuesProducer; +import org.apache.lucene.index.BinaryDocValues; +import org.apache.lucene.index.CorruptIndexException; +import org.apache.lucene.index.DocValues; +import org.apache.lucene.index.FieldInfo; +import org.apache.lucene.index.FieldInfos; +import org.apache.lucene.index.ImpactsEnum; +import org.apache.lucene.index.IndexFileNames; +import org.apache.lucene.index.NumericDocValues; +import org.apache.lucene.index.PostingsEnum; +import org.apache.lucene.index.SegmentReadState; +import org.apache.lucene.index.SortedDocValues; +import org.apache.lucene.index.SortedNumericDocValues; +import org.apache.lucene.index.SortedSetDocValues; +import org.apache.lucene.index.TermsEnum; +import org.apache.lucene.index.TermsEnum.SeekStatus; +import org.apache.lucene.store.ChecksumIndexInput; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.BytesRef; +import org.apache.lucene.util.IOUtils; +import org.apache.lucene.util.LongValues; +import org.apache.lucene.util.RamUsageEstimator; +import org.apache.lucene.util.packed.DirectMonotonicReader; +import org.apache.lucene.util.packed.DirectReader; + +/** reader for {@link Lucene80DocValuesFormat} */ +final class Lucene80DocValuesProducer extends DocValuesProducer implements Closeable { + private final Map numerics = new HashMap<>(); + private final Map binaries = new HashMap<>(); + private final Map sorted = new HashMap<>(); + private final Map sortedSets = new HashMap<>(); + private final Map sortedNumerics = new HashMap<>(); + private long ramBytesUsed; + private final IndexInput data; + private final int maxDoc; + + /** expert: instantiates a new reader */ + Lucene80DocValuesProducer(SegmentReadState state, String dataCodec, String dataExtension, String metaCodec, String metaExtension) throws IOException { +String metaName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, metaExtension); +this.maxDoc = state.segmentInfo.maxDoc(); +ramBytesUsed = RamUsageEstimator.shallowSizeOfInstance(getClass()); + +int version = -1; + +// read in the entries from the metadata file. +try (ChecksumIndexInput in = state.directory.openChecksumInput(metaName, state.context)) { + Throwable priorE = null; + try { +version = CodecUtil.checkIndexHeader(in, metaCodec, + Lucene80DocValuesFormat.VERSION_START, + Lucene80DocValuesFormat.VERSION_CURRENT, +state.segmentInfo.getId(), +state.segmentSuffix); +readFields(in, state.fieldInfos); + } catch (Throwable exception) { +priorE = exception; + } finally { +CodecUtil.checkFooter(in, priorE); + } +} + +String dataName = IndexFileNames.segmentFileName(state.segmentInfo.name, state.segmentSuffix, dataExtension); +this.data = state.directory.openInput(dataName, state.context); +boolean success = false; +try { + final int version2 = CodecUtil.checkIndexHeader(data, dataCodec, + Lucene80DocValuesFormat.VERSION_START, +
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243638367 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243641644 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243648602 --- Diff: lucene/test-framework/src/java/org/apache/lucene/index/BaseDocValuesFormatTestCase.java --- @@ -1204,6 +1204,12 @@ public void testRandomSortedBytes() throws IOException { } private void doTestNumericsVsStoredFields(double density, LongSupplier longs) throws Exception { +doTestNumericsVsStoredFields(density, longs, 256); +// TODO: 20 docs are needed to test jumps properly (see LUCENE-8585), but that is quite slow (few minutes). +// Maybe it can be nightly? +//doTestNumericsVsStoredFields(density, longs, 20); --- End diff -- Let's not do that in the base test case as this will be too slow with some codecs such as the SimpleTextCodec. What we have done before for formats that have branches that only get triggered with many documens is that we added tests to their own test file, ie. TestLucene80DocValuesFormat. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243630309 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243640446 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243630434 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,536 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: * A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits holds the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 32 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure for a given DENSE block is stored at the beginning of the
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243629404 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243636154 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243634870 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243625997 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. --- End diff -- I think you mean "not exceed 2^17 _bytes_"? Then maybe we should also express the maximum size of a block in bytes to make it easy to compare, ie. "2^13 + 36 bytes"? Should we simplify the encoding so that the long is actualy two 32-bit ints rather than one of 31 bits and another one of 33 bits? --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243624209 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure --- End diff -- I'm not sure to understand this sentence, did you mean to write another word than "blockCache" here? --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243631028 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[GitHub] lucene-solr pull request #525: LUCENE-8585: Index-time jump-tables for DocVa...
Github user jpountz commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/525#discussion_r243628701 --- Diff: lucene/core/src/java/org/apache/lucene/codecs/lucene80/IndexedDISI.java --- @@ -0,0 +1,542 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.lucene.codecs.lucene80; + +import java.io.DataInput; +import java.io.IOException; + +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.store.IndexInput; +import org.apache.lucene.store.IndexOutput; +import org.apache.lucene.store.RandomAccessInput; +import org.apache.lucene.util.ArrayUtil; +import org.apache.lucene.util.BitSetIterator; +import org.apache.lucene.util.FixedBitSet; +import org.apache.lucene.util.RoaringDocIdSet; + +/** + * Disk-based implementation of a {@link DocIdSetIterator} which can return + * the index of the current document, i.e. the ordinal of the current document + * among the list of documents that this iterator can return. This is useful + * to implement sparse doc values by only having to encode values for documents + * that actually have a value. + * Implementation-wise, this {@link DocIdSetIterator} is inspired of + * {@link RoaringDocIdSet roaring bitmaps} and encodes ranges of {@code 65536} + * documents independently and picks between 3 encodings depending on the + * density of the range: + * {@code ALL} if the range contains 65536 documents exactly, + * {@code DENSE} if the range contains 4096 documents or more; in that + * case documents are stored in a bit set, + * {@code SPARSE} otherwise, and the lower 16 bits of the doc IDs are + * stored in a {@link DataInput#readShort() short}. + * + * Only ranges that contain at least one value are encoded. + * This implementation uses 6 bytes per document in the worst-case, which happens + * in the case that all ranges contain exactly one document. + * + * + * To avoid O(n) lookup time complexity, with n being the number of documents, two lookup + * tables are used: A lookup table for block blockCache and index, and a rank structure + * for DENSE block lookups. + * + * The lookup table is an array of {@code long}s with an entry for each block. It allows for + * direct jumping to the block, as opposed to iteration from the current position and forward + * one block at a time. + * + * Each long entry consists of 2 logical parts: + * + * The first 31 bits hold the index (number of set bits in the blocks) up to just before the + * wanted block. The next 33 bits holds the offset in bytes into the underlying slice. + * As there is a maximum of 2^16 blocks, it follows that the maximum size of any block must + * not exceed 2^17 bits to avoid overflow. This is currently the case, with the largest + * block being DENSE and using 2^16 + 288 bits, and is likely to continue to hold as using + * more than double the amount of bits is unlikely to be an efficient representation. + * The cache overhead is numDocs/1024 bytes. + * + * Note: There are 4 types of blocks: ALL, DENSE, SPARSE and non-existing (0 set bits). + * In the case of non-existing blocks, the entry in the lookup table has index equal to the + * previous entry and offset equal to the next non-empty block. + * + * The block lookup table is stored at the end of the total block structure. + * + * + * The rank structure for DENSE blocks is an array of unsigned {@code short}s with an entry + * for each sub-block of 512 bits out of the 65536 bits in the outer DENSE block. + * + * Each rank-entry states the number of set bits within the block up to the bit before the + * bit positioned at the start of the sub-block. + * Note that that the rank entry of the first sub-block is always 0 and that the last entry can + * at most be 65536-512 = 65024 and thus will always fit into an unsigned short. + * + * The rank structure
[jira] [Commented] (SOLR-13037) Harden TestSimGenericDistributedQueue.
[ https://issues.apache.org/jira/browse/SOLR-13037?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726913#comment-16726913 ] Jason Gerlowski commented on SOLR-13037: fucit.org hasn't shown any {{branch_7x}} or {{master}} failures for this test since the fix went in last week. So I'm going to mark this as closed. (There are a few branch_7_6 failures, which makes sense since the fix hasn't gone to that branch. I'm happy to add the fix to that branch as well if anyone wants it, but my understanding is that we don't normally do this unless unless the fix is for a production-bug. It might make it marginally easier for anyone cutting a theoretical 7.6.1 to get passing builds, which was apparently a serious problem with 7.6. So I've got mixed feelings, but will hold off for now.) > Harden TestSimGenericDistributedQueue. > -- > > Key: SOLR-13037 > URL: https://issues.apache.org/jira/browse/SOLR-13037 > Project: Solr > Issue Type: Sub-task > Security Level: Public(Default Security Level. Issues are Public) > Components: Tests >Reporter: Mark Miller >Assignee: Jason Gerlowski >Priority: Major > Fix For: master (8.0), 7.7 > > Attachments: SOLR-13037.patch, repro-log.txt > > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Resolved] (SOLR-13037) Harden TestSimGenericDistributedQueue.
[ https://issues.apache.org/jira/browse/SOLR-13037?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Gerlowski resolved SOLR-13037. Resolution: Fixed Fix Version/s: 7.7 master (8.0) > Harden TestSimGenericDistributedQueue. > -- > > Key: SOLR-13037 > URL: https://issues.apache.org/jira/browse/SOLR-13037 > Project: Solr > Issue Type: Sub-task > Security Level: Public(Default Security Level. Issues are Public) > Components: Tests >Reporter: Mark Miller >Assignee: Jason Gerlowski >Priority: Major > Fix For: master (8.0), 7.7 > > Attachments: SOLR-13037.patch, repro-log.txt > > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-11086) Grouping by TrieDateField and DatePointField fails
[ https://issues.apache.org/jira/browse/SOLR-11086?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726891#comment-16726891 ] Lorenzo Farnararo commented on SOLR-11086: -- this is still present in the latest stable version 7.6.0 > Grouping by TrieDateField and DatePointField fails > -- > > Key: SOLR-11086 > URL: https://issues.apache.org/jira/browse/SOLR-11086 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: search >Affects Versions: 6.6, 7.0.1 >Reporter: Vitaly Lavrov >Priority: Minor > > Whenever grouping is done by either TrieDateField or DatePointField in > SolrCloud the request fails with the following error: > {code:java} > ERROR - 2017-07-14 13:29:29.587; [c:offers s:shard1 r:core_node1 > x:offers_shard1_replica1] org.apache.solr.common.SolrException; > org.apache.solr.common.SolrException: Invalid Date String:'Fri Feb 16 > 00:00:00 UTC 2018' > at > org.apache.solr.util.DateMathParser.parseMath(DateMathParser.java:234) > at > org.apache.solr.schema.DatePointField.toNativeType(DatePointField.java:118) > at > org.apache.solr.schema.DatePointField.readableToIndexed(DatePointField.java:187) > at > org.apache.solr.search.grouping.distributed.command.GroupConverter.fromMutable(GroupConverter.java:57) > at > org.apache.solr.search.grouping.distributed.command.SearchGroupsFieldCommand.result(SearchGroupsFieldCommand.java:124) > at > org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer.transform(SearchGroupsResultTransformer.java:58) > at > org.apache.solr.search.grouping.distributed.shardresultserializer.SearchGroupsResultTransformer.transform(SearchGroupsResultTransformer.java:37) > at > org.apache.solr.search.grouping.CommandHandler.processResult(CommandHandler.java:206) > at > org.apache.solr.handler.component.QueryComponent.process(QueryComponent.java:420) > at > org.apache.solr.handler.component.SearchHandler.handleRequestBody(SearchHandler.java:296) > at > org.apache.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java:173) > at org.apache.solr.core.SolrCore.execute(SolrCore.java:2477) > at org.apache.solr.servlet.HttpSolrCall.execute(HttpSolrCall.java:723) > at org.apache.solr.servlet.HttpSolrCall.call(HttpSolrCall.java:529) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:361) > at > org.apache.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:305) > at > org.eclipse.jetty.servlet.ServletHandler$CachedChain.doFilter(ServletHandler.java:1691) > at > org.eclipse.jetty.servlet.ServletHandler.doHandle(ServletHandler.java:582) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:143) > at > org.eclipse.jetty.security.SecurityHandler.handle(SecurityHandler.java:548) > at > org.eclipse.jetty.server.session.SessionHandler.doHandle(SessionHandler.java:226) > at > org.eclipse.jetty.server.handler.ContextHandler.doHandle(ContextHandler.java:1180) > at > org.eclipse.jetty.servlet.ServletHandler.doScope(ServletHandler.java:512) > at > org.eclipse.jetty.server.session.SessionHandler.doScope(SessionHandler.java:185) > at > org.eclipse.jetty.server.handler.ContextHandler.doScope(ContextHandler.java:1112) > at > org.eclipse.jetty.server.handler.ScopedHandler.handle(ScopedHandler.java:141) > at > org.eclipse.jetty.server.handler.ContextHandlerCollection.handle(ContextHandlerCollection.java:213) > at > org.eclipse.jetty.server.handler.HandlerCollection.handle(HandlerCollection.java:119) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at > org.eclipse.jetty.rewrite.handler.RewriteHandler.handle(RewriteHandler.java:335) > at > org.eclipse.jetty.server.handler.HandlerWrapper.handle(HandlerWrapper.java:134) > at org.eclipse.jetty.server.Server.handle(Server.java:534) > at org.eclipse.jetty.server.HttpChannel.handle(HttpChannel.java:320) > at > org.eclipse.jetty.server.HttpConnection.onFillable(HttpConnection.java:251) > at > org.eclipse.jetty.io.AbstractConnection$ReadCallback.succeeded(AbstractConnection.java:273) > at org.eclipse.jetty.io.FillInterest.fillable(FillInterest.java:95) > at > org.eclipse.jetty.io.SelectChannelEndPoint$2.run(SelectChannelEndPoint.java:93) > at > org.eclipse.jetty.util.thread.strategy.ExecuteProduceConsume.executeProduceConsume(ExecuteProduceConsume.java:303) > at >
[jira] [Assigned] (SOLR-13090) Make maxBooleanClauses support system-property override
[ https://issues.apache.org/jira/browse/SOLR-13090?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Jason Gerlowski reassigned SOLR-13090: -- Assignee: Jason Gerlowski > Make maxBooleanClauses support system-property override > --- > > Key: SOLR-13090 > URL: https://issues.apache.org/jira/browse/SOLR-13090 > Project: Solr > Issue Type: Improvement > Security Level: Public(Default Security Level. Issues are Public) >Affects Versions: master (8.0), 7.7 >Reporter: Jason Gerlowski >Assignee: Jason Gerlowski >Priority: Minor > > Currently, the {{maxBooleanClauses}} property is specified in most > solrconfig's as the hardcoded value "1024". It'd be nice if we changed our > shipped configs so that they instead specified it as > {{${solr.max.booleanClauses:1024} This would maintain the current OOTB behavior (maxBooleanClauses would still > default to 1024) while adding the ability to update maxBooleanClauses values > across the board much more easily. (I see users want to do this often when > they first run up against this limit.) -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-Tests-master - Build # 3078 - Still Unstable
Build: https://builds.apache.org/job/Lucene-Solr-Tests-master/3078/ 1 tests failed. FAILED: org.apache.solr.security.BasicAuthIntegrationTest.testBasicAuth Error Message: Error from server at https://127.0.0.1:33667/solr/authCollection: Error from server at null: Expected mime type application/octet-stream but got text/html. Error 401 require authentication HTTP ERROR 401 Problem accessing /solr/authCollection_shard3_replica_n4/select. Reason: require authenticationhttp://eclipse.org/jetty;>Powered by Jetty:// 9.4.14.v20181114 Stack Trace: org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteSolrException: Error from server at https://127.0.0.1:33667/solr/authCollection: Error from server at null: Expected mime type application/octet-stream but got text/html. Error 401 require authentication HTTP ERROR 401 Problem accessing /solr/authCollection_shard3_replica_n4/select. Reason: require authenticationhttp://eclipse.org/jetty;>Powered by Jetty:// 9.4.14.v20181114 at __randomizedtesting.SeedInfo.seed([8FA017AD6D110878:33CE61BFC9428B02]:0) at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:650) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:256) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:245) at org.apache.solr.client.solrj.impl.LBSolrClient.doRequest(LBSolrClient.java:368) at org.apache.solr.client.solrj.impl.LBSolrClient.request(LBSolrClient.java:296) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.request(LBHttpSolrClient.java:213) at org.apache.solr.client.solrj.impl.CloudSolrClient.sendRequest(CloudSolrClient.java:1110) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:884) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:817) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:207) at org.apache.solr.security.BasicAuthIntegrationTest.executeQuery(BasicAuthIntegrationTest.java:315) at org.apache.solr.security.BasicAuthIntegrationTest.testBasicAuth(BasicAuthIntegrationTest.java:288) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at
[GitHub] lucene-solr pull request #528: SOLR-12955 2
Github user barrotsteindev commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/528#discussion_r243628041 --- Diff: solr/core/src/java/org/apache/solr/update/processor/DistributedZkUpdateProcessor.java --- @@ -425,4 +448,10 @@ void setupRequest(UpdateCommand cmd) { nodes = setupRequest(dcmd.getId(), null); } } + + @Override + protected boolean shouldCloneCmdDoc() { +boolean willDistrib = isLeader && nodes != null && nodes.size() > 0; --- End diff -- This could probably optimized, since cloneRequiredOnLeader is already set during construction of the instance: `if(!cloneRequiredOnLeader) { return false; } // will distrib command return isLeader && nodes != null && nodes.size() > 0;` --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (LUCENE-8622) Add a MinimumShouldMatch interval iterator
[ https://issues.apache.org/jira/browse/LUCENE-8622?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Alan Woodward updated LUCENE-8622: -- Attachment: LUCENE-8622.patch > Add a MinimumShouldMatch interval iterator > -- > > Key: LUCENE-8622 > URL: https://issues.apache.org/jira/browse/LUCENE-8622 > Project: Lucene - Core > Issue Type: Task >Reporter: Alan Woodward >Assignee: Alan Woodward >Priority: Major > Attachments: LUCENE-8622.patch > > > It would be useful to be able to search for intervals that span some subgroup > of a set of iterators, allowing us to build a 'some of ' or 'at least' > operator - ie, search for terms that appear near at least 3 of a list of 5 > terms. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (LUCENE-8622) Add a MinimumShouldMatch interval iterator
[ https://issues.apache.org/jira/browse/LUCENE-8622?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726858#comment-16726858 ] Alan Woodward commented on LUCENE-8622: --- Attached is a patch containing a MinimumShouldMatchIntervalIterator, currently available via {{Intervals.atLeast}}. As well as being useful for these type of queries, I think this would also be handy for building a proximity boosting query, by iterating over subspans of a set of terms as a sort of very crude query segmentation. > Add a MinimumShouldMatch interval iterator > -- > > Key: LUCENE-8622 > URL: https://issues.apache.org/jira/browse/LUCENE-8622 > Project: Lucene - Core > Issue Type: Task >Reporter: Alan Woodward >Assignee: Alan Woodward >Priority: Major > Attachments: LUCENE-8622.patch > > > It would be useful to be able to search for intervals that span some subgroup > of a set of iterators, allowing us to build a 'some of ' or 'at least' > operator - ie, search for terms that appear near at least 3 of a list of 5 > terms. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Created] (LUCENE-8622) Add a MinimumShouldMatch interval iterator
Alan Woodward created LUCENE-8622: - Summary: Add a MinimumShouldMatch interval iterator Key: LUCENE-8622 URL: https://issues.apache.org/jira/browse/LUCENE-8622 Project: Lucene - Core Issue Type: Task Reporter: Alan Woodward Assignee: Alan Woodward Attachments: LUCENE-8622.patch It would be useful to be able to search for intervals that span some subgroup of a set of iterators, allowing us to build a 'some of ' or 'at least' operator - ie, search for terms that appear near at least 3 of a list of 5 terms. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (LUCENE-8585) Create jump-tables for DocValues at index-time
[ https://issues.apache.org/jira/browse/LUCENE-8585?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726857#comment-16726857 ] Adrien Grand commented on LUCENE-8585: -- I'm fine either way. Even if we don't add it today, it will be super easy to add in the future by checking the version when reading the meta to use a default value for indices that had been created before this was made configurable. > Create jump-tables for DocValues at index-time > -- > > Key: LUCENE-8585 > URL: https://issues.apache.org/jira/browse/LUCENE-8585 > Project: Lucene - Core > Issue Type: Improvement > Components: core/codecs >Affects Versions: master (8.0) >Reporter: Toke Eskildsen >Priority: Minor > Labels: performance > Attachments: LUCENE-8585.patch, LUCENE-8585.patch, > make_patch_lucene8585.sh > > Time Spent: 5h 10m > Remaining Estimate: 0h > > As noted in LUCENE-7589, lookup of DocValues should use jump-tables to avoid > long iterative walks. This is implemented in LUCENE-8374 at search-time > (first request for DocValues from a field in a segment), with the benefit of > working without changes to existing Lucene 7 indexes and the downside of > introducing a startup time penalty and a memory overhead. > As discussed in LUCENE-8374, the codec should be updated to create these > jump-tables at index time. This eliminates the segment-open time & memory > penalties, with the potential downside of increasing index-time for DocValues. > The three elements of LUCENE-8374 should be transferable to index-time > without much alteration of the core structures: > * {{IndexedDISI}} block offset and index skips: A {{long}} (64 bits) for > every 65536 documents, containing the offset of the block in 33 bits and the > index (number of set bits) up to the block in 31 bits. > It can be build sequentially and should be stored as a simple sequence of > consecutive longs for caching of lookups. > As it is fairly small, relative to document count, it might be better to > simply memory cache it? > * {{IndexedDISI}} DENSE (> 4095, < 65536 set bits) blocks: A {{short}} (16 > bits) for every 8 {{longs}} (512 bits) for a total of 256 bytes/DENSE_block. > Each {{short}} represents the number of set bits up to right before the > corresponding sub-block of 512 docIDs. > The \{{shorts}} can be computed sequentially or when the DENSE block is > flushed (probably the easiest). They should be stored as a simple sequence of > consecutive shorts for caching of lookups, one logically independent sequence > for each DENSE block. The logical position would be one sequence at the start > of every DENSE block. > Whether it is best to read all the 16 {{shorts}} up front when a DENSE block > is accessed or whether it is best to only read any individual {{short}} when > needed is not clear at this point. > * Variable Bits Per Value: A {{long}} (64 bits) for every 16384 numeric > values. Each {{long}} holds the offset to the corresponding block of values. > The offsets can be computed sequentially and should be stored as a simple > sequence of consecutive {{longs}} for caching of lookups. > The vBPV-offsets has the largest space overhead og the 3 jump-tables and a > lot of the 64 bits in each long are not used for most indexes. They could be > represented as a simple {{PackedInts}} sequence or {{MonotonicLongValues}}, > with the downsides of a potential lookup-time overhead and the need for doing > the compression after all offsets has been determined. > I have no experience with the codec-parts responsible for creating > index-structures. I'm quite willing to take a stab at this, although I > probably won't do much about it before January 2019. Should anyone else wish > to adopt this JIRA-issue or co-work on it, I'll be happy to share. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Created] (LUCENE-8621) Move LatLonShape out of sandbox
Adrien Grand created LUCENE-8621: Summary: Move LatLonShape out of sandbox Key: LUCENE-8621 URL: https://issues.apache.org/jira/browse/LUCENE-8621 Project: Lucene - Core Issue Type: Improvement Reporter: Adrien Grand LatLonShape has matured a lot over the last months, I'd like to start thinking about moving it out of sandbox so that it doesn't stay there for too long like what happened to LatLonPoint. I am pretty happy with the current encoding. To my knowledge, we might just need to do a minor modification because of LUCENE-8620. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-master-MacOSX (64bit/jdk-9) - Build # 4980 - Failure!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-master-MacOSX/4980/ Java: 64bit/jdk-9 -XX:-UseCompressedOops -XX:+UseG1GC All tests passed Build Log: [...truncated 15065 lines...] [junit4] JVM J1: stdout was not empty, see: /Users/jenkins/workspace/Lucene-Solr-master-MacOSX/solr/build/solr-core/test/temp/junit4-J1-20181221_144240_97510838144662412500121.sysout [junit4] >>> JVM J1 emitted unexpected output (verbatim) [junit4] # [junit4] # A fatal error has been detected by the Java Runtime Environment: [junit4] # [junit4] # SIGSEGV (0xb) at pc=0x00010f0a49d8, pid=61009, tid=20995 [junit4] # [junit4] # JRE version: Java(TM) SE Runtime Environment (9.0+181) (build 9+181) [junit4] # Java VM: Java HotSpot(TM) 64-Bit Server VM (9+181, mixed mode, tiered, g1 gc, bsd-amd64) [junit4] # Problematic frame: [junit4] # V [libjvm.dylib+0x4f49d8] [junit4] # [junit4] # No core dump will be written. Core dumps have been disabled. To enable core dumping, try "ulimit -c unlimited" before starting Java again [junit4] # [junit4] # An error report file with more information is saved as: [junit4] # /Users/jenkins/workspace/Lucene-Solr-master-MacOSX/solr/build/solr-core/test/J1/hs_err_pid61009.log [junit4] # [junit4] # Compiler replay data is saved as: [junit4] # /Users/jenkins/workspace/Lucene-Solr-master-MacOSX/solr/build/solr-core/test/J1/replay_pid61009.log [junit4] # [junit4] # If you would like to submit a bug report, please visit: [junit4] # http://bugreport.java.com/bugreport/crash.jsp [junit4] # [junit4] <<< JVM J1: EOF [...truncated 580 lines...] [junit4] ERROR: JVM J1 ended with an exception, command line: /Library/Java/JavaVirtualMachines/jdk-9.jdk/Contents/Home/bin/java -XX:-UseCompressedOops -XX:+UseG1GC -XX:+HeapDumpOnOutOfMemoryError -XX:HeapDumpPath=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/heapdumps -ea -esa --illegal-access=deny -Dtests.prefix=tests -Dtests.seed=A7551C48D91B7635 -Xmx512M -Dtests.iters= -Dtests.verbose=false -Dtests.infostream=false -Dtests.codec=random -Dtests.postingsformat=random -Dtests.docvaluesformat=random -Dtests.locale=random -Dtests.timezone=random -Dtests.directory=random -Dtests.linedocsfile=europarl.lines.txt.gz -Dtests.luceneMatchVersion=8.0.0 -Dtests.cleanthreads=perClass -Djava.util.logging.config.file=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/lucene/tools/junit4/logging.properties -Dtests.nightly=false -Dtests.weekly=false -Dtests.monster=false -Dtests.slow=true -Dtests.asserts=true -Dtests.multiplier=1 -DtempDir=./temp -Djava.io.tmpdir=./temp -Dcommon.dir=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/lucene -Dclover.db.dir=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/lucene/build/clover/db -Djava.security.policy=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/lucene/tools/junit4/solr-tests.policy -Dtests.LUCENE_VERSION=8.0.0 -Djetty.testMode=1 -Djetty.insecurerandom=1 -Dsolr.directoryFactory=org.apache.solr.core.MockDirectoryFactory -Djava.awt.headless=true -Djdk.map.althashing.threshold=0 -Dtests.src.home=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX -Djava.security.egd=file:/dev/./urandom -Djunit4.childvm.cwd=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/solr/build/solr-core/test/J1 -Djunit4.tempDir=/Users/jenkins/workspace/Lucene-Solr-master-MacOSX/solr/build/solr-core/test/temp -Djunit4.childvm.id=1 -Djunit4.childvm.count=2 -Dfile.encoding=US-ASCII -Dtests.disableHdfs=true -Djava.security.manager=org.apache.lucene.util.TestSecurityManager -Dtests.filterstacks=true -Dtests.leaveTemporary=false -Dtests.badapples=false -classpath
[jira] [Comment Edited] (LUCENE-8585) Create jump-tables for DocValues at index-time
[ https://issues.apache.org/jira/browse/LUCENE-8585?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726684#comment-16726684 ] Toke Eskildsen edited comment on LUCENE-8585 at 12/21/18 3:35 PM: -- [~jpountz] A colleague suggested that we make the {{DENSE}} rank span adjustable or at least plan for it. Right now it is fixed as an entry for every 8 longs and as we discussed earlier, that is just a qualified guess as to what works best. If we record the span in {{meta}} (only accepting powers of 2 for clean shifts & masks) and hardcode it to 8 in the consumer for now, it is just a single {{short}} spend to allow for future tweaking, without having to change the underlying codec. _Addition:_ Thinking about it, I seem to be falling into the usual trap of adding features and not shipping. If the suggestion above will put the 8.0-target at risk, then let's just postpone it. was (Author: toke): [~jpountz] A colleague suggested that we make the {{DENSE}} rank span adjustable or at least plan for it. Right now it is fixed as an entry for every 8 longs and as we discussed earlier, that is just a qualified guess as to what works best. If we record the span in {{meta}} (only accepting powers of 2 for clean shifts & masks) and hardcode it to 8 in the consumer for now, it is just a single {{short}} spend to allow for future tweaking, without having to change the underlying codec. > Create jump-tables for DocValues at index-time > -- > > Key: LUCENE-8585 > URL: https://issues.apache.org/jira/browse/LUCENE-8585 > Project: Lucene - Core > Issue Type: Improvement > Components: core/codecs >Affects Versions: master (8.0) >Reporter: Toke Eskildsen >Priority: Minor > Labels: performance > Attachments: LUCENE-8585.patch, LUCENE-8585.patch, > make_patch_lucene8585.sh > > Time Spent: 5h 10m > Remaining Estimate: 0h > > As noted in LUCENE-7589, lookup of DocValues should use jump-tables to avoid > long iterative walks. This is implemented in LUCENE-8374 at search-time > (first request for DocValues from a field in a segment), with the benefit of > working without changes to existing Lucene 7 indexes and the downside of > introducing a startup time penalty and a memory overhead. > As discussed in LUCENE-8374, the codec should be updated to create these > jump-tables at index time. This eliminates the segment-open time & memory > penalties, with the potential downside of increasing index-time for DocValues. > The three elements of LUCENE-8374 should be transferable to index-time > without much alteration of the core structures: > * {{IndexedDISI}} block offset and index skips: A {{long}} (64 bits) for > every 65536 documents, containing the offset of the block in 33 bits and the > index (number of set bits) up to the block in 31 bits. > It can be build sequentially and should be stored as a simple sequence of > consecutive longs for caching of lookups. > As it is fairly small, relative to document count, it might be better to > simply memory cache it? > * {{IndexedDISI}} DENSE (> 4095, < 65536 set bits) blocks: A {{short}} (16 > bits) for every 8 {{longs}} (512 bits) for a total of 256 bytes/DENSE_block. > Each {{short}} represents the number of set bits up to right before the > corresponding sub-block of 512 docIDs. > The \{{shorts}} can be computed sequentially or when the DENSE block is > flushed (probably the easiest). They should be stored as a simple sequence of > consecutive shorts for caching of lookups, one logically independent sequence > for each DENSE block. The logical position would be one sequence at the start > of every DENSE block. > Whether it is best to read all the 16 {{shorts}} up front when a DENSE block > is accessed or whether it is best to only read any individual {{short}} when > needed is not clear at this point. > * Variable Bits Per Value: A {{long}} (64 bits) for every 16384 numeric > values. Each {{long}} holds the offset to the corresponding block of values. > The offsets can be computed sequentially and should be stored as a simple > sequence of consecutive {{longs}} for caching of lookups. > The vBPV-offsets has the largest space overhead og the 3 jump-tables and a > lot of the 64 bits in each long are not used for most indexes. They could be > represented as a simple {{PackedInts}} sequence or {{MonotonicLongValues}}, > with the downsides of a potential lookup-time overhead and the need for doing > the compression after all offsets has been determined. > I have no experience with the codec-parts responsible for creating > index-structures. I'm quite willing to take a stab at this, although I > probably won't do much about it before January 2019. Should anyone else wish > to adopt
[jira] [Commented] (SOLR-12121) JWT Authentication plugin
[ https://issues.apache.org/jira/browse/SOLR-12121?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726839#comment-16726839 ] Jan Høydahl commented on SOLR-12121: New push: * Rearrange tests a bit * Added tests for exceptional cases such as invalid JWK token, cannot validate signature * Support HTTP2 * Fix metrics counting * Update docs This is getting closer to committable state. Precommit passes. Need some beasting first :) > JWT Authentication plugin > - > > Key: SOLR-12121 > URL: https://issues.apache.org/jira/browse/SOLR-12121 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) > Components: Authentication >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Fix For: master (8.0) > > Attachments: image-2018-08-27-13-04-04-183.png > > Time Spent: 1h > Remaining Estimate: 0h > > A new Authentication plugin that will accept a [Json Web > Token|https://en.wikipedia.org/wiki/JSON_Web_Token] (JWT) in the > Authorization header and validate it by checking the cryptographic signature. > The plugin will not perform the authentication itself but assert that the > user was authenticated by the service that issued the JWT token. > JWT defined a number of standard claims, and user principal can be fetched > from the {{sub}} (subject) claim and passed on to Solr. The plugin will > always check the {{exp}} (expiry) claim and optionally enforce checks on the > {{iss}} (issuer) and {{aud}} (audience) claims. > The first version of the plugin will only support RSA signing keys and will > support fetching the public key of the issuer through a [Json Web > Key|https://tools.ietf.org/html/rfc7517] (JWK) file, either from a https URL > or from local file. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Created] (LUCENE-8620) Add CONTAINS support for LatLonShape
Ignacio Vera created LUCENE-8620: Summary: Add CONTAINS support for LatLonShape Key: LUCENE-8620 URL: https://issues.apache.org/jira/browse/LUCENE-8620 Project: Lucene - Core Issue Type: Improvement Components: modules/sandbox Reporter: Ignacio Vera Currently the only spatial operation that cannot be performed using {{LatLonShape}} is CONTAINS. This issue will add such capability by tracking if an edge of a generated triangle from the {{Tessellator}} is an edge of the polygon. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Description: The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* Sample syntax: {code:java} let(a=array(1,2,3), b=array(4,5,6), zplot(line1=a, line2=b, linec=array(7,8,9))){code} was: The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* Sample syntax: {code:java} let(a=array(1,2,3), b=array(4,5,6), zplot(line1=a,line2=b,linec=array(7,8,9))){code} > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b, linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Description: The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* Sample syntax: {code:java} let(a=array(1,2,3), b=array(4,5,6), zplot(line1=a, line2=b,line=array(7,8,9))){code} was:The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a, line2=b,line=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Description: The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* Sample syntax: {code:java} let(a=array(1,2,3), b=array(4,5,6), zplot(line1=a,line2=b,linec=array(7,8,9))){code} was: The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) can already execute Streaming Expressions and therefore Math Expressions. The *zplot* function will export the results of Solr Math Expressions in a format the Solr Zeppelin interpreter can work with. This will allow Math Expressions to be plotted by *Apache Zeppelin.* Sample syntax: {code:java} let(a=array(1,2,3), b=array(4,5,6), zplot(line1=a, line2=b,line=array(7,8,9))){code} > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* > Sample syntax: > {code:java} > let(a=array(1,2,3), > b=array(4,5,6), > zplot(line1=a,line2=b,linec=array(7,8,9))){code} -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13088) Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin
[ https://issues.apache.org/jira/browse/SOLR-13088?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Joel Bernstein updated SOLR-13088: -- Attachment: SOLR-13088.patch > Add zplot Stream Evaluator to plot math expressions in Apache Zeppelin > -- > > Key: SOLR-13088 > URL: https://issues.apache.org/jira/browse/SOLR-13088 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) >Reporter: Joel Bernstein >Assignee: Joel Bernstein >Priority: Major > Attachments: SOLR-13088.patch > > > The Solr Zeppelin interpreter ([https://github.com/lucidworks/zeppelin-solr]) > can already execute Streaming Expressions and therefore Math Expressions. > The *zplot* function will export the results of Solr Math Expressions in a > format the Solr Zeppelin interpreter can work with. This will allow Math > Expressions to be plotted by *Apache Zeppelin.* -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Created] (LUCENE-8619) Decrease I/O pressure of OfflineSorter
Adrien Grand created LUCENE-8619: Summary: Decrease I/O pressure of OfflineSorter Key: LUCENE-8619 URL: https://issues.apache.org/jira/browse/LUCENE-8619 Project: Lucene - Core Issue Type: Improvement Reporter: Adrien Grand OfflineSorter is likely I/O bound, yet it doesn't really try to relieve I/O. For instance it always writes the length on 2 bytes, which is waseful when used by BKDWriter since all byte[] arrays have exactly the same length. For LatLonPoint, this is a 25% space overhead that we could remove. Doing lightweight compression on the fly might also help. As a data point, Ignacio told me that after indexing 60M shapes with LatLonShape (1.65B triangles), the index directory was about 265GB and dropped to 57GB when merging was over. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Resolved] (LUCENE-8600) DocValuesFieldUpdates should use a better sort
[ https://issues.apache.org/jira/browse/LUCENE-8600?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Adrien Grand resolved LUCENE-8600. -- Resolution: Fixed Fix Version/s: 7.7 master (8.0) > DocValuesFieldUpdates should use a better sort > -- > > Key: LUCENE-8600 > URL: https://issues.apache.org/jira/browse/LUCENE-8600 > Project: Lucene - Core > Issue Type: Improvement >Reporter: Adrien Grand >Priority: Minor > Fix For: master (8.0), 7.7 > > Attachments: LUCENE-8600.patch > > > This is a follow-up to LUCENE-8598: Simon identified that swaps are a > bottleneck to applying doc-value updates, in particular due to the overhead > of packed ints. It turns out that InPlaceMergeSorter does LOTS of swaps in > order to perform in-place. Replacing with a more efficient sort should help. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #343: SOLR-12121: JWT Token authentication plugin
Github user janhoy commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/343#discussion_r243600574 --- Diff: solr/core/src/test/org/apache/solr/security/JWTAuthPluginIntegrationTest.java --- @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.security; + +import java.io.BufferedReader; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.io.OutputStream; +import java.net.HttpURLConnection; +import java.net.URL; +import java.nio.charset.StandardCharsets; +import java.util.concurrent.TimeoutException; +import java.util.concurrent.atomic.AtomicInteger; +import java.util.stream.Collectors; + +import org.apache.http.Header; +import org.apache.http.HttpException; +import org.apache.http.HttpHeaders; +import org.apache.http.HttpRequest; +import org.apache.http.HttpRequestInterceptor; +import org.apache.http.entity.ContentType; +import org.apache.http.protocol.HttpContext; +import org.apache.solr.client.solrj.impl.HttpClientUtil; +import org.apache.solr.cloud.SolrCloudAuthTestCase; +import org.apache.solr.common.util.Pair; +import org.jose4j.jwk.PublicJsonWebKey; +import org.jose4j.jwk.RsaJsonWebKey; +import org.jose4j.jws.AlgorithmIdentifiers; +import org.jose4j.jws.JsonWebSignature; +import org.jose4j.jwt.JwtClaims; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; + +/** + * Validate that JWT token authentication works in a real cluster. + * TODO: Test also using SolrJ as client. But that requires a way to set Authorization header on request, see SOLR-13070 + */ +public class JWTAuthPluginIntegrationTest extends SolrCloudAuthTestCase { + protected static final int NUM_SERVERS = 2; + protected static final int NUM_SHARDS = 2; + protected static final int REPLICATION_FACTOR = 1; + private static final String COLLECTION = "jwtColl"; + private static String jwtTestToken; + private static String baseUrl; + private static AtomicInteger jwtInterceptCount = new AtomicInteger(); + private static AtomicInteger pkiInterceptCount = new AtomicInteger(); + private static final CountInterceptor interceptor = new CountInterceptor(); + + @BeforeClass + public static void setupClass() throws Exception { +configureCluster(NUM_SERVERS)// nodes + .withSecurityJson(TEST_PATH().resolve("security").resolve("jwt_plugin_jwk_security.json")) +.addConfig("conf1", TEST_PATH().resolve("configsets").resolve("cloud-minimal").resolve("conf")) +.configure(); +baseUrl = cluster.getRandomJetty(random()).getBaseUrl().toString(); + +String jwkJSON = "{\n" + +" \"kty\": \"RSA\",\n" + +" \"d\": \"i6pyv2z3o-MlYytWsOr3IE1olu2RXZBzjPRBNgWAP1TlLNaphHEvH5aHhe_CtBAastgFFMuP29CFhaL3_tGczkvWJkSveZQN2AHWHgRShKgoSVMspkhOt3Ghha4CvpnZ9BnQzVHnaBnHDTTTfVgXz7P1ZNBhQY4URG61DKIF-JSSClyh1xKuMoJX0lILXDYGGcjVTZL_hci4IXPPTpOJHV51-pxuO7WU5M9252UYoiYyCJ56ai8N49aKIMsqhdGuO4aWUwsGIW4oQpjtce5eEojCprYl-9rDhTwLAFoBtjy6LvkqlR2Ae5dKZYpStljBjK8PJrBvWZjXAEMDdQ8PuQ\",\n" + +" \"e\": \"AQAB\",\n" + +" \"use\": \"sig\",\n" + +" \"kid\": \"test\",\n" + +" \"alg\": \"RS256\",\n" + +" \"n\": \"jeyrvOaZrmKWjyNXt0myAc_pJ1hNt3aRupExJEx1ewPaL9J9HFgSCjMrYxCB1ETO1NDyZ3nSgjZis-jHHDqBxBjRdq_t1E2rkGFaYbxAyKt220Pwgme_SFTB9MXVrFQGkKyjmQeVmOmV6zM3KK8uMdKQJ4aoKmwBcF5Zg7EZdDcKOFgpgva1Jq-FlEsaJ2xrYDYo3KnGcOHIt9_0NQeLsqZbeWYLxYni7uROFncXYV5FhSJCeR4A_rrbwlaCydGxE0ToC_9HNYibUHlkJjqyUhAgORCbNS8JLCJH8NUi5sDdIawK9GTSyvsJXZ-QHqo4cMUuxWV5AJtaRGghuMUfqQ\"\n" + +"}"; + +PublicJsonWebKey jwk = RsaJsonWebKey.Factory.newPublicJwk(jwkJSON); +JwtClaims claims = JWTAuthPluginTest.generateClaims(); +JsonWebSignature jws = new JsonWebSignature(); +jws.setPayload(claims.toJson()); +
[jira] [Created] (SOLR-13090) Make maxBooleanClauses support system-property override
Jason Gerlowski created SOLR-13090: -- Summary: Make maxBooleanClauses support system-property override Key: SOLR-13090 URL: https://issues.apache.org/jira/browse/SOLR-13090 Project: Solr Issue Type: Improvement Security Level: Public (Default Security Level. Issues are Public) Affects Versions: master (8.0), 7.7 Reporter: Jason Gerlowski Currently, the {{maxBooleanClauses}} property is specified in most solrconfig's as the hardcoded value "1024". It'd be nice if we changed our shipped configs so that they instead specified it as {{${solr.max.booleanClauses:1024}
[JENKINS] Lucene-Solr-Tests-7.x - Build # 1145 - Still Unstable
Build: https://builds.apache.org/job/Lucene-Solr-Tests-7.x/1145/ 1 tests failed. FAILED: org.apache.solr.analytics.legacy.facet.LegacyFieldFacetCloudTest.multiValueTest Error Message: Error from server at http://127.0.0.1:45196/solr: KeeperErrorCode = Session expired for /configs/conf Stack Trace: org.apache.solr.client.solrj.impl.HttpSolrClient$RemoteSolrException: Error from server at http://127.0.0.1:45196/solr: KeeperErrorCode = Session expired for /configs/conf at __randomizedtesting.SeedInfo.seed([F25455AA6382DB49:324579012DAC0139]:0) at org.apache.solr.client.solrj.impl.HttpSolrClient.executeMethod(HttpSolrClient.java:643) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:255) at org.apache.solr.client.solrj.impl.HttpSolrClient.request(HttpSolrClient.java:244) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.doRequest(LBHttpSolrClient.java:484) at org.apache.solr.client.solrj.impl.LBHttpSolrClient.request(LBHttpSolrClient.java:414) at org.apache.solr.client.solrj.impl.CloudSolrClient.sendRequest(CloudSolrClient.java:1110) at org.apache.solr.client.solrj.impl.CloudSolrClient.requestWithRetryOnStaleState(CloudSolrClient.java:884) at org.apache.solr.client.solrj.impl.CloudSolrClient.request(CloudSolrClient.java:817) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:194) at org.apache.solr.client.solrj.SolrRequest.process(SolrRequest.java:211) at org.apache.solr.analytics.legacy.LegacyAbstractAnalyticsCloudTest.setupCollection(LegacyAbstractAnalyticsCloudTest.java:51) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:972) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at
[JENKINS] Lucene-Solr-NightlyTests-7.x - Build # 410 - Still unstable
Build: https://builds.apache.org/job/Lucene-Solr-NightlyTests-7.x/410/ 4 tests failed. FAILED: org.apache.solr.cloud.RestartWhileUpdatingTest.test Error Message: There are still nodes recoverying - waited for 320 seconds Stack Trace: java.lang.AssertionError: There are still nodes recoverying - waited for 320 seconds at __randomizedtesting.SeedInfo.seed([2DC0C97CCCF325AB:A594F6A6620F4853]:0) at org.junit.Assert.fail(Assert.java:88) at org.apache.solr.cloud.AbstractDistribZkTestBase.waitForRecoveriesToFinish(AbstractDistribZkTestBase.java:195) at org.apache.solr.cloud.AbstractFullDistribZkTestBase.waitForRecoveriesToFinish(AbstractFullDistribZkTestBase.java:1014) at org.apache.solr.cloud.AbstractFullDistribZkTestBase.waitForThingsToLevelOut(AbstractFullDistribZkTestBase.java:1571) at org.apache.solr.cloud.RestartWhileUpdatingTest.test(RestartWhileUpdatingTest.java:143) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsFixedStatement.callStatement(BaseDistributedSearchTestCase.java:1063) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsStatement.evaluate(BaseDistributedSearchTestCase.java:1035) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at
[jira] [Commented] (LUCENE-8618) MMapDirectory's read ahead on random-access files might trash the OS cache
[ https://issues.apache.org/jira/browse/LUCENE-8618?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726773#comment-16726773 ] Uwe Schindler commented on LUCENE-8618: --- IMHO, this 2 MB read ahead is operating system specific and may be changed based on the sysctl settings. As there is the easy possible way to use FileSwitchDirectory for those specific update-use-cases, I don't think that's something we need to change in Lucene's lower level, that's more a operating system config. This was always an option to use FileSwitchDirectory, if you know the file extensions. Maybe we should have some getter on all codecs, so you can quickly get the file extensions uses by the codec as a Set, so you can easily build your fileswitch directory. If we can access madvise/fadvise at some point, we can use IOContext for this, but that's not possible yet. > MMapDirectory's read ahead on random-access files might trash the OS cache > -- > > Key: LUCENE-8618 > URL: https://issues.apache.org/jira/browse/LUCENE-8618 > Project: Lucene - Core > Issue Type: Improvement >Reporter: Adrien Grand >Priority: Minor > > At Elastic we were reported a case which runs significantly slower with > MMapDirectory than with NIOFSDirectory. After a long analysis, we discovered > that it had to do with MMapDirectory's read ahead of 2MB, which doesn't help > and even trashes the OS cache on stored fields and term vectors files which > have a fully random access pattern (except at merge time). > The particular use-case that exhibits the slow-down is performing updates, > ie. we first look up a document based on its id, fetch stored fields, compute > new stored fields (eg. after adding or changing the value of a field) and add > the document back to the index. We were able to reproduce the workload that > this Elasticsearch user described and measured a median throughput of 3600 > updates/s with MMapDirectory and 5000 updates/s with NIOFSDirectory. It even > goes up to 5600 updates/s if you configure a FileSwitchDirectory to use > MMapDirectory for the terms dictionary and NIOFSDirectory for stored fields > (postings files are not relevant here since postings are inlined in the terms > dict when docFreq=1 and indexOptions=DOCS). > While it is possible to work around this issue on top of Lucene, maybe this > is something that we could improve directly in Lucene, eg. by propagating > information about the expected access pattern and avoiding mmap on files that > have a fully random access pattern (until Java exposes madvise in some way)? -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-7.6-Windows (32bit/jdk1.8.0_172) - Build # 28 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-7.6-Windows/28/ Java: 32bit/jdk1.8.0_172 -client -XX:+UseSerialGC 10 tests failed. FAILED: org.apache.solr.cloud.DeleteReplicaTest.raceConditionOnDeleteAndRegisterReplica Error Message: Expected new active leader null Live Nodes: [127.0.0.1:57611_solr, 127.0.0.1:57614_solr, 127.0.0.1:57616_solr] Last available state: DocCollection(raceDeleteReplica_false//collections/raceDeleteReplica_false/state.json/13)={ "pullReplicas":"0", "replicationFactor":"2", "shards":{"shard1":{ "range":"8000-7fff", "state":"active", "replicas":{ "core_node3":{ "core":"raceDeleteReplica_false_shard1_replica_n1", "base_url":"http://127.0.0.1:57626/solr;, "node_name":"127.0.0.1:57626_solr", "state":"down", "type":"NRT", "force_set_state":"false", "leader":"true"}, "core_node6":{ "core":"raceDeleteReplica_false_shard1_replica_n5", "base_url":"http://127.0.0.1:57626/solr;, "node_name":"127.0.0.1:57626_solr", "state":"down", "type":"NRT", "force_set_state":"false", "router":{"name":"compositeId"}, "maxShardsPerNode":"1", "autoAddReplicas":"false", "nrtReplicas":"2", "tlogReplicas":"0"} Stack Trace: java.lang.AssertionError: Expected new active leader null Live Nodes: [127.0.0.1:57611_solr, 127.0.0.1:57614_solr, 127.0.0.1:57616_solr] Last available state: DocCollection(raceDeleteReplica_false//collections/raceDeleteReplica_false/state.json/13)={ "pullReplicas":"0", "replicationFactor":"2", "shards":{"shard1":{ "range":"8000-7fff", "state":"active", "replicas":{ "core_node3":{ "core":"raceDeleteReplica_false_shard1_replica_n1", "base_url":"http://127.0.0.1:57626/solr;, "node_name":"127.0.0.1:57626_solr", "state":"down", "type":"NRT", "force_set_state":"false", "leader":"true"}, "core_node6":{ "core":"raceDeleteReplica_false_shard1_replica_n5", "base_url":"http://127.0.0.1:57626/solr;, "node_name":"127.0.0.1:57626_solr", "state":"down", "type":"NRT", "force_set_state":"false", "router":{"name":"compositeId"}, "maxShardsPerNode":"1", "autoAddReplicas":"false", "nrtReplicas":"2", "tlogReplicas":"0"} at __randomizedtesting.SeedInfo.seed([CAA2BAD302AB6BC1:A0B4DB036A59210B]:0) at org.junit.Assert.fail(Assert.java:93) at org.apache.solr.cloud.SolrCloudTestCase.waitForState(SolrCloudTestCase.java:280) at org.apache.solr.cloud.DeleteReplicaTest.raceConditionOnDeleteAndRegisterReplica(DeleteReplicaTest.java:334) at org.apache.solr.cloud.DeleteReplicaTest.raceConditionOnDeleteAndRegisterReplica(DeleteReplicaTest.java:230) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1742) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:935) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:971) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:985) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:944) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:830) at
[jira] [Commented] (LUCENE-8618) MMapDirectory's read ahead on random-access files might trash the OS cache
[ https://issues.apache.org/jira/browse/LUCENE-8618?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726761#comment-16726761 ] Robert Muir commented on LUCENE-8618: - {quote} we first look up a document based on its id, fetch stored fields, compute new stored fields (eg. after adding or changing the value of a field) and add the document back to the index. {quote} I don't think we should make things complicated to optimize for this. > MMapDirectory's read ahead on random-access files might trash the OS cache > -- > > Key: LUCENE-8618 > URL: https://issues.apache.org/jira/browse/LUCENE-8618 > Project: Lucene - Core > Issue Type: Improvement >Reporter: Adrien Grand >Priority: Minor > > At Elastic we were reported a case which runs significantly slower with > MMapDirectory than with NIOFSDirectory. After a long analysis, we discovered > that it had to do with MMapDirectory's read ahead of 2MB, which doesn't help > and even trashes the OS cache on stored fields and term vectors files which > have a fully random access pattern (except at merge time). > The particular use-case that exhibits the slow-down is performing updates, > ie. we first look up a document based on its id, fetch stored fields, compute > new stored fields (eg. after adding or changing the value of a field) and add > the document back to the index. We were able to reproduce the workload that > this Elasticsearch user described and measured a median throughput of 3600 > updates/s with MMapDirectory and 5000 updates/s with NIOFSDirectory. It even > goes up to 5600 updates/s if you configure a FileSwitchDirectory to use > MMapDirectory for the terms dictionary and NIOFSDirectory for stored fields > (postings files are not relevant here since postings are inlined in the terms > dict when docFreq=1 and indexOptions=DOCS). > While it is possible to work around this issue on top of Lucene, maybe this > is something that we could improve directly in Lucene, eg. by propagating > information about the expected access pattern and avoiding mmap on files that > have a fully random access pattern (until Java exposes madvise in some way)? -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Created] (LUCENE-8618) MMapDirectory's read ahead on random-access files might trash the OS cache
Adrien Grand created LUCENE-8618: Summary: MMapDirectory's read ahead on random-access files might trash the OS cache Key: LUCENE-8618 URL: https://issues.apache.org/jira/browse/LUCENE-8618 Project: Lucene - Core Issue Type: Improvement Reporter: Adrien Grand At Elastic we were reported a case which runs significantly slower with MMapDirectory than with NIOFSDirectory. After a long analysis, we discovered that it had to do with MMapDirectory's read ahead of 2MB, which doesn't help and even trashes the OS cache on stored fields and term vectors files which have a fully random access pattern (except at merge time). The particular use-case that exhibits the slow-down is performing updates, ie. we first look up a document based on its id, fetch stored fields, compute new stored fields (eg. after adding or changing the value of a field) and add the document back to the index. We were able to reproduce the workload that this Elasticsearch user described and measured a median throughput of 3600 updates/s with MMapDirectory and 5000 updates/s with NIOFSDirectory. It even goes up to 5600 updates/s if you configure a FileSwitchDirectory to use MMapDirectory for the terms dictionary and NIOFSDirectory for stored fields (postings files are not relevant here since postings are inlined in the terms dict when docFreq=1 and indexOptions=DOCS). While it is possible to work around this issue on top of Lucene, maybe this is something that we could improve directly in Lucene, eg. by propagating information about the expected access pattern and avoiding mmap on files that have a fully random access pattern (until Java exposes madvise in some way)? -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13089) bin/solr's use of lsof has some issues
[ https://issues.apache.org/jira/browse/SOLR-13089?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Martijn Koster updated SOLR-13089: -- Attachment: (was: 0001-SOLR-13089-lsof-fixes.patch) > bin/solr's use of lsof has some issues > -- > > Key: SOLR-13089 > URL: https://issues.apache.org/jira/browse/SOLR-13089 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCLI >Reporter: Martijn Koster >Priority: Minor > > The {{bin/solr}} script uses this {{lsof}} invocation to check if the Solr > port is being listened on: > {noformat} > running=`lsof -PniTCP:$SOLR_PORT -sTCP:LISTEN` > if [ -z "$running" ]; then > {noformat} > code is at > [here|https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L2147]. > There are a few issues with this. > h2. 1. False negatives when port is occupied by different user > When {{lsof}} runs as non-root, it only shows sockets for processes with your > effective uid. > For example: > {noformat} > $ id -u && nc -l 7788 & > [1] 26576 > 1000 > works: nc ran as my user > $ lsof -PniTCP:7788 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > nc 26580 mak3u IPv4 2818104 0t0 TCP *:7788 (LISTEN) > fails: ssh is running as root > $ lsof -PniTCP:22 -sTCP:LISTEN > works if we are root > $ sudo lsof -PniTCP:22 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > sshd2524 root3u IPv4 18426 0t0 TCP *:22 (LISTEN) > sshd2524 root4u IPv6 18428 0t0 TCP *:22 (LISTEN) > {noformat} > Solr runs as non-root. > So if some other process owned by a different user occupies that port, you > will get a false negative (it will say Solr is not running even though it is) > I can't think of a good way to fix or work around that (short of not using > {{lsof}} in the first place). > Perhaps an uncommon scenario we need not worry too much about. > h2. 2. lsof can complain about lack of /etc/password entries > If {{lsof}} runs without the current effective user having an entry in > {{/etc/passwd}}, > it produces a warning on stderr: > {noformat} > $ docker run -d -u 0 solr:7.6.0 bash -c "chown -R /opt/; gosu > solr-foreground" > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 > $ docker exec -it -u > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 bash > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 -sTCP:LISTEN > lsof: no pwd entry for UID > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > lsof: no pwd entry for UID > java 9 115u IPv4 2813503 0t0 TCP *:8983 (LISTEN) > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 > -sTCP:LISTEN>/dev/null > lsof: no pwd entry for UID > lsof: no pwd entry for UID > {noformat} > You can avoid this by using the {{-t}} tag, which specifies that lsof should > produce terse output with process identifiers only and no header: > {noformat} > I have no name!@4397c3f51d4a:/opt/solr$ lsof -t -PniTCP:8983 -sTCP:LISTEN > 9 > {noformat} > This is a rare circumstance, but one I encountered and worked around. > h2. 3. On Alpine, lsof is implemented by busybox, but with incompatible > arguments > On Alpine, {{busybox}} implements {{lsof}}, but does not support the > arguments, so you get: > {noformat} > $ docker run -it alpine sh > / # lsof -t -PniTCP:8983 -sTCP:LISTEN > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/tty > {noformat} > so if you ran Solr, in the background, and it failed to start, this code > would produce a false positive. > For example: > {noformat} > docker volume create mysol > docker run -v mysol:/mysol bash bash -c "chown 8983:8983 /mysol" > docker run -it -v mysol:/mysol -w /mysol -v > $HOME/Downloads/solr-7.6.0.tgz:/solr-7.6.0.tgz openjdk:8-alpine sh > apk add procps bash > tar xvzf /solr-7.6.0.tgz > chown -R 8983:8983 . > {noformat} > then in a separate terminal: > {noformat} > $ docker exec -it -u 8983 serene_saha sh > /mysol $ SOLR_OPTS=--invalid ./solr-7.6.0/bin/solr start > whoami: unknown uid 8983 > Waiting up to 180 seconds to see Solr running on port 8983 [|] > Started Solr server on port 8983 (pid=101). Happy searching! > /mysol $ > {noformat} > and in another separate terminal: > {noformat} > $ docker exec -it thirsty_liskov bash > bash-4.4$ cat server/logs/solr-8983-console.log > Unrecognized option: --invalid > Error: Could not create the Java Virtual Machine. > Error: A fatal exception has occurred. Program will exit. > {noformat} > so it is saying Solr is running, when it isn't. > Now, all this can be avoided by just installing the real {{lsof}} with
[jira] [Updated] (SOLR-13087) bin/solr's use of whoami can produce warnings
[ https://issues.apache.org/jira/browse/SOLR-13087?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Martijn Koster updated SOLR-13087: -- Attachment: 0001-SOLR-13087-Use-EUID-to-check-for-root-in-bin-solr.patch > bin/solr's use of whoami can produce warnings > - > > Key: SOLR-13087 > URL: https://issues.apache.org/jira/browse/SOLR-13087 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCLI >Reporter: Martijn Koster >Priority: Trivial > Attachments: > 0001-SOLR-13087-Use-EUID-to-check-for-root-in-bin-solr.patch > > > The {{bin/solr}} script uses {{whoami}} to determine if it is running as root. > That code is [here > |https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L1089]and > was introduced with [this commit > |https://github.com/apache/lucene-solr/commit/7561461f738a447856bb93b0a847b0200fff4c9c]for > SOLR-7826 > This can produce a warning if the effective uid is not present in > {{/etc/passwd}}. > For example: > {noformat} > $ docker run -it -u 8 bash bash -c 'whoami' > whoami: unknown uid 8 > {noformat} > This is an unusual situation, but one I encountered and [worked > around|https://github.com/docker-solr/docker-solr/commit/8a4e236e93d6fcda3e33937b332d1911c1e7b8f8#diff-1578307c887a49e90a57418e653ad7f6R49] > The use of {{whoami}} can be avoided by using {{id -u}}, or better yet > {{$UID}} / {{$EUID}} which is built into bash and thus also avoids a subshell > invocation. I'm not aware of any downside to this approach. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13087) bin/solr's use of whoami can produce warnings
[ https://issues.apache.org/jira/browse/SOLR-13087?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Martijn Koster updated SOLR-13087: -- Attachment: (was: 0001-SOLR-13087-Use-EUID-to-check-for-root-in-bin-solr.patch) > bin/solr's use of whoami can produce warnings > - > > Key: SOLR-13087 > URL: https://issues.apache.org/jira/browse/SOLR-13087 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCLI >Reporter: Martijn Koster >Priority: Trivial > Attachments: > 0001-SOLR-13087-Use-EUID-to-check-for-root-in-bin-solr.patch > > > The {{bin/solr}} script uses {{whoami}} to determine if it is running as root. > That code is [here > |https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L1089]and > was introduced with [this commit > |https://github.com/apache/lucene-solr/commit/7561461f738a447856bb93b0a847b0200fff4c9c]for > SOLR-7826 > This can produce a warning if the effective uid is not present in > {{/etc/passwd}}. > For example: > {noformat} > $ docker run -it -u 8 bash bash -c 'whoami' > whoami: unknown uid 8 > {noformat} > This is an unusual situation, but one I encountered and [worked > around|https://github.com/docker-solr/docker-solr/commit/8a4e236e93d6fcda3e33937b332d1911c1e7b8f8#diff-1578307c887a49e90a57418e653ad7f6R49] > The use of {{whoami}} can be avoided by using {{id -u}}, or better yet > {{$UID}} / {{$EUID}} which is built into bash and thus also avoids a subshell > invocation. I'm not aware of any downside to this approach. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13089) bin/solr's use of lsof has some issues
[ https://issues.apache.org/jira/browse/SOLR-13089?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Martijn Koster updated SOLR-13089: -- Attachment: 0001-SOLR-13089-lsof-fixes.patch > bin/solr's use of lsof has some issues > -- > > Key: SOLR-13089 > URL: https://issues.apache.org/jira/browse/SOLR-13089 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCLI >Reporter: Martijn Koster >Priority: Minor > Attachments: 0001-SOLR-13089-lsof-fixes.patch > > > The {{bin/solr}} script uses this {{lsof}} invocation to check if the Solr > port is being listened on: > {noformat} > running=`lsof -PniTCP:$SOLR_PORT -sTCP:LISTEN` > if [ -z "$running" ]; then > {noformat} > code is at > [here|https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L2147]. > There are a few issues with this. > h2. 1. False negatives when port is occupied by different user > When {{lsof}} runs as non-root, it only shows sockets for processes with your > effective uid. > For example: > {noformat} > $ id -u && nc -l 7788 & > [1] 26576 > 1000 > works: nc ran as my user > $ lsof -PniTCP:7788 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > nc 26580 mak3u IPv4 2818104 0t0 TCP *:7788 (LISTEN) > fails: ssh is running as root > $ lsof -PniTCP:22 -sTCP:LISTEN > works if we are root > $ sudo lsof -PniTCP:22 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > sshd2524 root3u IPv4 18426 0t0 TCP *:22 (LISTEN) > sshd2524 root4u IPv6 18428 0t0 TCP *:22 (LISTEN) > {noformat} > Solr runs as non-root. > So if some other process owned by a different user occupies that port, you > will get a false negative (it will say Solr is not running even though it is) > I can't think of a good way to fix or work around that (short of not using > {{lsof}} in the first place). > Perhaps an uncommon scenario we need not worry too much about. > h2. 2. lsof can complain about lack of /etc/password entries > If {{lsof}} runs without the current effective user having an entry in > {{/etc/passwd}}, > it produces a warning on stderr: > {noformat} > $ docker run -d -u 0 solr:7.6.0 bash -c "chown -R /opt/; gosu > solr-foreground" > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 > $ docker exec -it -u > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 bash > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 -sTCP:LISTEN > lsof: no pwd entry for UID > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > lsof: no pwd entry for UID > java 9 115u IPv4 2813503 0t0 TCP *:8983 (LISTEN) > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 > -sTCP:LISTEN>/dev/null > lsof: no pwd entry for UID > lsof: no pwd entry for UID > {noformat} > You can avoid this by using the {{-t}} tag, which specifies that lsof should > produce terse output with process identifiers only and no header: > {noformat} > I have no name!@4397c3f51d4a:/opt/solr$ lsof -t -PniTCP:8983 -sTCP:LISTEN > 9 > {noformat} > This is a rare circumstance, but one I encountered and worked around. > h2. 3. On Alpine, lsof is implemented by busybox, but with incompatible > arguments > On Alpine, {{busybox}} implements {{lsof}}, but does not support the > arguments, so you get: > {noformat} > $ docker run -it alpine sh > / # lsof -t -PniTCP:8983 -sTCP:LISTEN > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/tty > {noformat} > so if you ran Solr, in the background, and it failed to start, this code > would produce a false positive. > For example: > {noformat} > docker volume create mysol > docker run -v mysol:/mysol bash bash -c "chown 8983:8983 /mysol" > docker run -it -v mysol:/mysol -w /mysol -v > $HOME/Downloads/solr-7.6.0.tgz:/solr-7.6.0.tgz openjdk:8-alpine sh > apk add procps bash > tar xvzf /solr-7.6.0.tgz > chown -R 8983:8983 . > {noformat} > then in a separate terminal: > {noformat} > $ docker exec -it -u 8983 serene_saha sh > /mysol $ SOLR_OPTS=--invalid ./solr-7.6.0/bin/solr start > whoami: unknown uid 8983 > Waiting up to 180 seconds to see Solr running on port 8983 [|] > Started Solr server on port 8983 (pid=101). Happy searching! > /mysol $ > {noformat} > and in another separate terminal: > {noformat} > $ docker exec -it thirsty_liskov bash > bash-4.4$ cat server/logs/solr-8983-console.log > Unrecognized option: --invalid > Error: Could not create the Java Virtual Machine. > Error: A fatal exception has occurred. Program will exit. > {noformat} > so it is saying Solr is running, when it isn't. > Now, all this can be
[jira] [Commented] (SOLR-13045) Harden TestSimPolicyCloud
[ https://issues.apache.org/jira/browse/SOLR-13045?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726730#comment-16726730 ] ASF subversion and git services commented on SOLR-13045: Commit ba514dd7ccd0ed275ac914aa0cb7f866cf874233 in lucene-solr's branch refs/heads/branch_7x from Jason Gerlowski [ https://git-wip-us.apache.org/repos/asf?p=lucene-solr.git;h=ba514dd ] SOLR-13045: Sim node versioning should start at 0 Prior to this commit, new ZK nodes being simulated by the sim framework were started with a version of -1. This causes problems, since -1 is also coincidentally the flag value used to ignore optimistic concurrency locking and force overwrite values. > Harden TestSimPolicyCloud > - > > Key: SOLR-13045 > URL: https://issues.apache.org/jira/browse/SOLR-13045 > Project: Solr > Issue Type: Test > Security Level: Public(Default Security Level. Issues are Public) > Components: AutoScaling >Affects Versions: master (8.0) >Reporter: Jason Gerlowski >Assignee: Jason Gerlowski >Priority: Major > Attachments: SOLR-13045.patch, SOLR-13045.patch, jenkins.log.txt.gz > > > Several tests in TestSimPolicyCloud, but especially > {{testCreateCollectionAddReplica}}, have some flaky behavior, even after > Mark's recent test-fix commit. This JIRA covers looking into and (hopefully) > fixing this test failure. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-13045) Harden TestSimPolicyCloud
[ https://issues.apache.org/jira/browse/SOLR-13045?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726728#comment-16726728 ] ASF subversion and git services commented on SOLR-13045: Commit 207b3f4453e6f4fafe50faee094b96b3c7e4600b in lucene-solr's branch refs/heads/master from Jason Gerlowski [ https://git-wip-us.apache.org/repos/asf?p=lucene-solr.git;h=207b3f4 ] SOLR-13045: Sim node versioning should start at 0 Prior to this commit, new ZK nodes being simulated by the sim framework were started with a version of -1. This causes problems, since -1 is also coincidentally the flag value used to ignore optimistic concurrency locking and force overwrite values. > Harden TestSimPolicyCloud > - > > Key: SOLR-13045 > URL: https://issues.apache.org/jira/browse/SOLR-13045 > Project: Solr > Issue Type: Test > Security Level: Public(Default Security Level. Issues are Public) > Components: AutoScaling >Affects Versions: master (8.0) >Reporter: Jason Gerlowski >Assignee: Jason Gerlowski >Priority: Major > Attachments: SOLR-13045.patch, SOLR-13045.patch, jenkins.log.txt.gz > > > Several tests in TestSimPolicyCloud, but especially > {{testCreateCollectionAddReplica}}, have some flaky behavior, even after > Mark's recent test-fix commit. This JIRA covers looking into and (hopefully) > fixing this test failure. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Updated] (SOLR-13089) bin/solr's use of lsof has some issues
[ https://issues.apache.org/jira/browse/SOLR-13089?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ] Martijn Koster updated SOLR-13089: -- Attachment: 0001-SOLR-13089-lsof-fixes.patch > bin/solr's use of lsof has some issues > -- > > Key: SOLR-13089 > URL: https://issues.apache.org/jira/browse/SOLR-13089 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: SolrCLI >Reporter: Martijn Koster >Priority: Minor > Attachments: 0001-SOLR-13089-lsof-fixes.patch > > > The {{bin/solr}} script uses this {{lsof}} invocation to check if the Solr > port is being listened on: > {noformat} > running=`lsof -PniTCP:$SOLR_PORT -sTCP:LISTEN` > if [ -z "$running" ]; then > {noformat} > code is at > [here|https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L2147]. > There are a few issues with this. > h2. 1. False negatives when port is occupied by different user > When {{lsof}} runs as non-root, it only shows sockets for processes with your > effective uid. > For example: > {noformat} > $ id -u && nc -l 7788 & > [1] 26576 > 1000 > works: nc ran as my user > $ lsof -PniTCP:7788 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > nc 26580 mak3u IPv4 2818104 0t0 TCP *:7788 (LISTEN) > fails: ssh is running as root > $ lsof -PniTCP:22 -sTCP:LISTEN > works if we are root > $ sudo lsof -PniTCP:22 -sTCP:LISTEN > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > sshd2524 root3u IPv4 18426 0t0 TCP *:22 (LISTEN) > sshd2524 root4u IPv6 18428 0t0 TCP *:22 (LISTEN) > {noformat} > Solr runs as non-root. > So if some other process owned by a different user occupies that port, you > will get a false negative (it will say Solr is not running even though it is) > I can't think of a good way to fix or work around that (short of not using > {{lsof}} in the first place). > Perhaps an uncommon scenario we need not worry too much about. > h2. 2. lsof can complain about lack of /etc/password entries > If {{lsof}} runs without the current effective user having an entry in > {{/etc/passwd}}, > it produces a warning on stderr: > {noformat} > $ docker run -d -u 0 solr:7.6.0 bash -c "chown -R /opt/; gosu > solr-foreground" > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 > $ docker exec -it -u > 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 bash > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 -sTCP:LISTEN > lsof: no pwd entry for UID > COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME > lsof: no pwd entry for UID > java 9 115u IPv4 2813503 0t0 TCP *:8983 (LISTEN) > I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 > -sTCP:LISTEN>/dev/null > lsof: no pwd entry for UID > lsof: no pwd entry for UID > {noformat} > You can avoid this by using the {{-t}} tag, which specifies that lsof should > produce terse output with process identifiers only and no header: > {noformat} > I have no name!@4397c3f51d4a:/opt/solr$ lsof -t -PniTCP:8983 -sTCP:LISTEN > 9 > {noformat} > This is a rare circumstance, but one I encountered and worked around. > h2. 3. On Alpine, lsof is implemented by busybox, but with incompatible > arguments > On Alpine, {{busybox}} implements {{lsof}}, but does not support the > arguments, so you get: > {noformat} > $ docker run -it alpine sh > / # lsof -t -PniTCP:8983 -sTCP:LISTEN > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/pts/0 > 1 /bin/busybox/dev/tty > {noformat} > so if you ran Solr, in the background, and it failed to start, this code > would produce a false positive. > For example: > {noformat} > docker volume create mysol > docker run -v mysol:/mysol bash bash -c "chown 8983:8983 /mysol" > docker run -it -v mysol:/mysol -w /mysol -v > $HOME/Downloads/solr-7.6.0.tgz:/solr-7.6.0.tgz openjdk:8-alpine sh > apk add procps bash > tar xvzf /solr-7.6.0.tgz > chown -R 8983:8983 . > {noformat} > then in a separate terminal: > {noformat} > $ docker exec -it -u 8983 serene_saha sh > /mysol $ SOLR_OPTS=--invalid ./solr-7.6.0/bin/solr start > whoami: unknown uid 8983 > Waiting up to 180 seconds to see Solr running on port 8983 [|] > Started Solr server on port 8983 (pid=101). Happy searching! > /mysol $ > {noformat} > and in another separate terminal: > {noformat} > $ docker exec -it thirsty_liskov bash > bash-4.4$ cat server/logs/solr-8983-console.log > Unrecognized option: --invalid > Error: Could not create the Java Virtual Machine. > Error: A fatal exception has occurred. Program will exit. > {noformat} > so it is saying Solr is running, when it isn't. > Now, all this can be
[jira] [Created] (SOLR-13089) bin/solr's use of lsof has some issues
Martijn Koster created SOLR-13089: - Summary: bin/solr's use of lsof has some issues Key: SOLR-13089 URL: https://issues.apache.org/jira/browse/SOLR-13089 Project: Solr Issue Type: Bug Security Level: Public (Default Security Level. Issues are Public) Components: SolrCLI Reporter: Martijn Koster The {{bin/solr}} script uses this {{lsof}} invocation to check if the Solr port is being listened on: {noformat} running=`lsof -PniTCP:$SOLR_PORT -sTCP:LISTEN` if [ -z "$running" ]; then {noformat} code is at [here|https://github.com/apache/lucene-solr/blob/master/solr/bin/solr#L2147]. There are a few issues with this. h2. 1. False negatives when port is occupied by different user When {{lsof}} runs as non-root, it only shows sockets for processes with your effective uid. For example: {noformat} $ id -u && nc -l 7788 & [1] 26576 1000 works: nc ran as my user $ lsof -PniTCP:7788 -sTCP:LISTEN COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME nc 26580 mak3u IPv4 2818104 0t0 TCP *:7788 (LISTEN) fails: ssh is running as root $ lsof -PniTCP:22 -sTCP:LISTEN works if we are root $ sudo lsof -PniTCP:22 -sTCP:LISTEN COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME sshd2524 root3u IPv4 18426 0t0 TCP *:22 (LISTEN) sshd2524 root4u IPv6 18428 0t0 TCP *:22 (LISTEN) {noformat} Solr runs as non-root. So if some other process owned by a different user occupies that port, you will get a false negative (it will say Solr is not running even though it is) I can't think of a good way to fix or work around that (short of not using {{lsof}} in the first place). Perhaps an uncommon scenario we need not worry too much about. h2. 2. lsof can complain about lack of /etc/password entries If {{lsof}} runs without the current effective user having an entry in {{/etc/passwd}}, it produces a warning on stderr: {noformat} $ docker run -d -u 0 solr:7.6.0 bash -c "chown -R /opt/; gosu solr-foreground" 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 $ docker exec -it -u 4397c3f51d4a1cfca7e5815e5b047f75fb144265d4582745a584f0dba51480c6 bash I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 -sTCP:LISTEN lsof: no pwd entry for UID COMMAND PID USER FD TYPE DEVICE SIZE/OFF NODE NAME lsof: no pwd entry for UID java 9 115u IPv4 2813503 0t0 TCP *:8983 (LISTEN) I have no name!@4397c3f51d4a:/opt/solr$ lsof -PniTCP:8983 -sTCP:LISTEN>/dev/null lsof: no pwd entry for UID lsof: no pwd entry for UID {noformat} You can avoid this by using the {{-t}} tag, which specifies that lsof should produce terse output with process identifiers only and no header: {noformat} I have no name!@4397c3f51d4a:/opt/solr$ lsof -t -PniTCP:8983 -sTCP:LISTEN 9 {noformat} This is a rare circumstance, but one I encountered and worked around. h2. 3. On Alpine, lsof is implemented by busybox, but with incompatible arguments On Alpine, {{busybox}} implements {{lsof}}, but does not support the arguments, so you get: {noformat} $ docker run -it alpine sh / # lsof -t -PniTCP:8983 -sTCP:LISTEN 1 /bin/busybox/dev/pts/0 1 /bin/busybox/dev/pts/0 1 /bin/busybox/dev/pts/0 1 /bin/busybox/dev/tty {noformat} so if you ran Solr, in the background, and it failed to start, this code would produce a false positive. For example: {noformat} docker volume create mysol docker run -v mysol:/mysol bash bash -c "chown 8983:8983 /mysol" docker run -it -v mysol:/mysol -w /mysol -v $HOME/Downloads/solr-7.6.0.tgz:/solr-7.6.0.tgz openjdk:8-alpine sh apk add procps bash tar xvzf /solr-7.6.0.tgz chown -R 8983:8983 . {noformat} then in a separate terminal: {noformat} $ docker exec -it -u 8983 serene_saha sh /mysol $ SOLR_OPTS=--invalid ./solr-7.6.0/bin/solr start whoami: unknown uid 8983 Waiting up to 180 seconds to see Solr running on port 8983 [|] Started Solr server on port 8983 (pid=101). Happy searching! /mysol $ {noformat} and in another separate terminal: {noformat} $ docker exec -it thirsty_liskov bash bash-4.4$ cat server/logs/solr-8983-console.log Unrecognized option: --invalid Error: Could not create the Java Virtual Machine. Error: A fatal exception has occurred. Program will exit. {noformat} so it is saying Solr is running, when it isn't. Now, all this can be avoided by just installing the real {{lsof}} with {{apk add lsof}} which works properly. So should we detect and warn? Or even refuse to run rather than invoke a tool that does not implement the contract we expect? h2. 4. Shellcheck dislikes backticks Shellcheck says {{SC2006: Use $(..) instead of legacy `..`.}} Now, shellcheck complains about 130 other issues too, so it's a drop in a bucket, but if we're changing things, might as well fix that. -- This message was sent by
[jira] [Commented] (LUCENE-8585) Create jump-tables for DocValues at index-time
[ https://issues.apache.org/jira/browse/LUCENE-8585?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726684#comment-16726684 ] Toke Eskildsen commented on LUCENE-8585: [~jpountz] A colleague suggested that we make the {{DENSE}} rank span adjustable or at least plan for it. Right now it is fixed as an entry for every 8 longs and as we discussed earlier, that is just a qualified guess as to what works best. If we record the span in {{meta}} (only accepting powers of 2 for clean shifts & masks) and hardcode it to 8 in the consumer for now, it is just a single {{short}} spend to allow for future tweaking, without having to change the underlying codec. > Create jump-tables for DocValues at index-time > -- > > Key: LUCENE-8585 > URL: https://issues.apache.org/jira/browse/LUCENE-8585 > Project: Lucene - Core > Issue Type: Improvement > Components: core/codecs >Affects Versions: master (8.0) >Reporter: Toke Eskildsen >Priority: Minor > Labels: performance > Attachments: LUCENE-8585.patch, LUCENE-8585.patch, > make_patch_lucene8585.sh > > Time Spent: 5h 10m > Remaining Estimate: 0h > > As noted in LUCENE-7589, lookup of DocValues should use jump-tables to avoid > long iterative walks. This is implemented in LUCENE-8374 at search-time > (first request for DocValues from a field in a segment), with the benefit of > working without changes to existing Lucene 7 indexes and the downside of > introducing a startup time penalty and a memory overhead. > As discussed in LUCENE-8374, the codec should be updated to create these > jump-tables at index time. This eliminates the segment-open time & memory > penalties, with the potential downside of increasing index-time for DocValues. > The three elements of LUCENE-8374 should be transferable to index-time > without much alteration of the core structures: > * {{IndexedDISI}} block offset and index skips: A {{long}} (64 bits) for > every 65536 documents, containing the offset of the block in 33 bits and the > index (number of set bits) up to the block in 31 bits. > It can be build sequentially and should be stored as a simple sequence of > consecutive longs for caching of lookups. > As it is fairly small, relative to document count, it might be better to > simply memory cache it? > * {{IndexedDISI}} DENSE (> 4095, < 65536 set bits) blocks: A {{short}} (16 > bits) for every 8 {{longs}} (512 bits) for a total of 256 bytes/DENSE_block. > Each {{short}} represents the number of set bits up to right before the > corresponding sub-block of 512 docIDs. > The \{{shorts}} can be computed sequentially or when the DENSE block is > flushed (probably the easiest). They should be stored as a simple sequence of > consecutive shorts for caching of lookups, one logically independent sequence > for each DENSE block. The logical position would be one sequence at the start > of every DENSE block. > Whether it is best to read all the 16 {{shorts}} up front when a DENSE block > is accessed or whether it is best to only read any individual {{short}} when > needed is not clear at this point. > * Variable Bits Per Value: A {{long}} (64 bits) for every 16384 numeric > values. Each {{long}} holds the offset to the corresponding block of values. > The offsets can be computed sequentially and should be stored as a simple > sequence of consecutive {{longs}} for caching of lookups. > The vBPV-offsets has the largest space overhead og the 3 jump-tables and a > lot of the 64 bits in each long are not used for most indexes. They could be > represented as a simple {{PackedInts}} sequence or {{MonotonicLongValues}}, > with the downsides of a potential lookup-time overhead and the need for doing > the compression after all offsets has been determined. > I have no experience with the codec-parts responsible for creating > index-structures. I'm quite willing to take a stab at this, although I > probably won't do much about it before January 2019. Should anyone else wish > to adopt this JIRA-issue or co-work on it, I'll be happy to share. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (LUCENE-8612) Add the ability to enforce gaps between intervals
[ https://issues.apache.org/jira/browse/LUCENE-8612?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726655#comment-16726655 ] Adrien Grand commented on LUCENE-8612: -- Looks good. I'm wondering whether we should return NO_MORE_INTERVALS-1 if end() + after is equal to NO_MORE_INTERVALS? > Add the ability to enforce gaps between intervals > - > > Key: LUCENE-8612 > URL: https://issues.apache.org/jira/browse/LUCENE-8612 > Project: Lucene - Core > Issue Type: Task >Reporter: Alan Woodward >Assignee: Alan Woodward >Priority: Major > Attachments: LUCENE-8612.patch, LUCENE-8612.patch, LUCENE-8612.patch > > > At the moment you can search for intervals with a maximum number of positions > between them, but you cannot enforce gaps. It would be useful to be able to > search for `a b [2 spaces] c`. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (LUCENE-8585) Create jump-tables for DocValues at index-time
[ https://issues.apache.org/jira/browse/LUCENE-8585?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726650#comment-16726650 ] Adrien Grand commented on LUCENE-8585: -- Thanks Toke, I'll have a look today. > Create jump-tables for DocValues at index-time > -- > > Key: LUCENE-8585 > URL: https://issues.apache.org/jira/browse/LUCENE-8585 > Project: Lucene - Core > Issue Type: Improvement > Components: core/codecs >Affects Versions: master (8.0) >Reporter: Toke Eskildsen >Priority: Minor > Labels: performance > Attachments: LUCENE-8585.patch, LUCENE-8585.patch, > make_patch_lucene8585.sh > > Time Spent: 5h 10m > Remaining Estimate: 0h > > As noted in LUCENE-7589, lookup of DocValues should use jump-tables to avoid > long iterative walks. This is implemented in LUCENE-8374 at search-time > (first request for DocValues from a field in a segment), with the benefit of > working without changes to existing Lucene 7 indexes and the downside of > introducing a startup time penalty and a memory overhead. > As discussed in LUCENE-8374, the codec should be updated to create these > jump-tables at index time. This eliminates the segment-open time & memory > penalties, with the potential downside of increasing index-time for DocValues. > The three elements of LUCENE-8374 should be transferable to index-time > without much alteration of the core structures: > * {{IndexedDISI}} block offset and index skips: A {{long}} (64 bits) for > every 65536 documents, containing the offset of the block in 33 bits and the > index (number of set bits) up to the block in 31 bits. > It can be build sequentially and should be stored as a simple sequence of > consecutive longs for caching of lookups. > As it is fairly small, relative to document count, it might be better to > simply memory cache it? > * {{IndexedDISI}} DENSE (> 4095, < 65536 set bits) blocks: A {{short}} (16 > bits) for every 8 {{longs}} (512 bits) for a total of 256 bytes/DENSE_block. > Each {{short}} represents the number of set bits up to right before the > corresponding sub-block of 512 docIDs. > The \{{shorts}} can be computed sequentially or when the DENSE block is > flushed (probably the easiest). They should be stored as a simple sequence of > consecutive shorts for caching of lookups, one logically independent sequence > for each DENSE block. The logical position would be one sequence at the start > of every DENSE block. > Whether it is best to read all the 16 {{shorts}} up front when a DENSE block > is accessed or whether it is best to only read any individual {{short}} when > needed is not clear at this point. > * Variable Bits Per Value: A {{long}} (64 bits) for every 16384 numeric > values. Each {{long}} holds the offset to the corresponding block of values. > The offsets can be computed sequentially and should be stored as a simple > sequence of consecutive {{longs}} for caching of lookups. > The vBPV-offsets has the largest space overhead og the 3 jump-tables and a > lot of the 64 bits in each long are not used for most indexes. They could be > represented as a simple {{PackedInts}} sequence or {{MonotonicLongValues}}, > with the downsides of a potential lookup-time overhead and the need for doing > the compression after all offsets has been determined. > I have no experience with the codec-parts responsible for creating > index-structures. I'm quite willing to take a stab at this, although I > probably won't do much about it before January 2019. Should anyone else wish > to adopt this JIRA-issue or co-work on it, I'll be happy to share. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[jira] [Commented] (SOLR-12120) New plugin type AuditLoggerPlugin
[ https://issues.apache.org/jira/browse/SOLR-12120?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726623#comment-16726623 ] Jan Høydahl commented on SOLR-12120: Pushed some new commits to [Pull Request #342|https://github.com/apache/lucene-solr/pull/342] * Addressing more of previous comments, such as test extending {{LuceneTestCase}}, see PR for details * Now AUTHENTICATED, ANONYMOUS and AUTHORIZED are not logged by default * Instead of {{auditIfConfigured(auditLoggerPlugin, auditEvent)}} we now do a much more lightweight check on {{shouldLog(eventType)}} before even creating the AuditEvent object * Configurable {{numThreads}} for the executorService of {{AsyncAuditLoggerPlugin}} * Catch and log exception from {{auditCallback}} to avoid background thread exiting on error Remaining before first commit * Integration test > New plugin type AuditLoggerPlugin > - > > Key: SOLR-12120 > URL: https://issues.apache.org/jira/browse/SOLR-12120 > Project: Solr > Issue Type: New Feature > Security Level: Public(Default Security Level. Issues are Public) > Components: security >Reporter: Jan Høydahl >Assignee: Jan Høydahl >Priority: Major > Time Spent: 3h > Remaining Estimate: 0h > > Solr needs a well defined plugin point to implement audit logging > functionality, which is independent from whatever {{AuthenticationPlugin}} or > {{AuthorizationPlugin}} are in use at the time. > It seems reasonable to introduce a new plugin type {{AuditLoggerPlugin}}. It > could be configured in solr.xml or it could be a third type of plugin defined > in {{security.json}}, i.e. > {code:java} > { > "authentication" : { "class" : ... }, > "authorization" : { "class" : ... }, > "auditlogging" : { "class" : "x.y.MyAuditLogger", ... } > } > {code} > We could then instrument SolrDispatchFilter to the audit plugin with an > AuditEvent at important points such as successful authentication: > {code:java} > auditLoggerPlugin.audit(new SolrAuditEvent(EventType.AUTHENTICATED, > request)); > {code} > We will mark the impl as {{@lucene.experimental}} in the first release to > let it settle as people write their own plugin implementations. -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-7.6-Linux (64bit/jdk1.8.0_172) - Build # 129 - Unstable!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-7.6-Linux/129/ Java: 64bit/jdk1.8.0_172 -XX:-UseCompressedOops -XX:+UseSerialGC 3 tests failed. FAILED: org.apache.solr.cloud.LIRRollingUpdatesTest.testNewLeaderAndMixedReplicas Error Message: Timeout waiting for recovering null Live Nodes: [127.0.0.1:38615_solr, 127.0.0.1:43195_solr, 127.0.0.1:44141_solr] Last available state: DocCollection(testMixedReplicas-false//collections/testMixedReplicas-false/state.json/16)={ "pullReplicas":"0", "replicationFactor":"2", "shards":{"shard1":{ "range":"8000-7fff", "state":"active", "replicas":{ "core_node42":{ "core":"testMixedReplicas-false_shard1_replica_n41", "base_url":"http://127.0.0.1:38615/solr;, "node_name":"127.0.0.1:38615_solr", "state":"active", "type":"NRT", "force_set_state":"false", "leader":"true"}, "core_node44":{ "core":"testMixedReplicas-false_shard1_replica_n43", "base_url":"http://127.0.0.1:43195/solr;, "node_name":"127.0.0.1:43195_solr", "state":"recovering", "type":"NRT", "force_set_state":"false"}, "core_node46":{ "core":"testMixedReplicas-false_shard1_replica_n45", "base_url":"http://127.0.0.1:44141/solr;, "node_name":"127.0.0.1:44141_solr", "state":"active", "type":"NRT", "force_set_state":"false", "router":{"name":"compositeId"}, "maxShardsPerNode":"1", "autoAddReplicas":"false", "nrtReplicas":"2", "tlogReplicas":"0"} Stack Trace: java.lang.AssertionError: Timeout waiting for recovering null Live Nodes: [127.0.0.1:38615_solr, 127.0.0.1:43195_solr, 127.0.0.1:44141_solr] Last available state: DocCollection(testMixedReplicas-false//collections/testMixedReplicas-false/state.json/16)={ "pullReplicas":"0", "replicationFactor":"2", "shards":{"shard1":{ "range":"8000-7fff", "state":"active", "replicas":{ "core_node42":{ "core":"testMixedReplicas-false_shard1_replica_n41", "base_url":"http://127.0.0.1:38615/solr;, "node_name":"127.0.0.1:38615_solr", "state":"active", "type":"NRT", "force_set_state":"false", "leader":"true"}, "core_node44":{ "core":"testMixedReplicas-false_shard1_replica_n43", "base_url":"http://127.0.0.1:43195/solr;, "node_name":"127.0.0.1:43195_solr", "state":"recovering", "type":"NRT", "force_set_state":"false"}, "core_node46":{ "core":"testMixedReplicas-false_shard1_replica_n45", "base_url":"http://127.0.0.1:44141/solr;, "node_name":"127.0.0.1:44141_solr", "state":"active", "type":"NRT", "force_set_state":"false", "router":{"name":"compositeId"}, "maxShardsPerNode":"1", "autoAddReplicas":"false", "nrtReplicas":"2", "tlogReplicas":"0"} at __randomizedtesting.SeedInfo.seed([ABD4639B30FEDEDA:1EA38D000B9A8DBA]:0) at org.junit.Assert.fail(Assert.java:93) at org.apache.solr.cloud.SolrCloudTestCase.waitForState(SolrCloudTestCase.java:280) at org.apache.solr.cloud.LIRRollingUpdatesTest.testLeaderAndMixedReplicas(LIRRollingUpdatesTest.java:306) at org.apache.solr.cloud.LIRRollingUpdatesTest.testNewLeaderAndMixedReplicas(LIRRollingUpdatesTest.java:340) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1742) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:935) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:971) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:985) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at
[jira] [Commented] (SOLR-13080) TermsQParserPlugin automaton method fails to sort input
[ https://issues.apache.org/jira/browse/SOLR-13080?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel=16726602#comment-16726602 ] Daniel Lowe commented on SOLR-13080: Thanks. I didn't know Lucene had its own timSort implementation, being consistent with how this is done elsewhere makes sense to me, it should be a simple change if in the future Lucene/Solr wants to use Java's implementation (LUCENE-7268) > TermsQParserPlugin automaton method fails to sort input > --- > > Key: SOLR-13080 > URL: https://issues.apache.org/jira/browse/SOLR-13080 > Project: Solr > Issue Type: Bug > Security Level: Public(Default Security Level. Issues are Public) > Components: query parsers >Affects Versions: 7.5 >Reporter: Daniel Lowe >Assignee: David Smiley >Priority: Minor > Attachments: SOLR-13080.patch > > > The contract for Automata.makeStringUnion is that the input is sorted. As > BytesRef implements Comparable. The simplest fix would probably to make > Arrays.sort(bytesRefs); > The first line of automaton's makeFilter method in TermsQParserPlugin. > -- This message was sent by Atlassian JIRA (v7.6.3#76005) - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[GitHub] lucene-solr pull request #342: SOLR-12120: New AuditLoggerPlugin type allowi...
Github user janhoy commented on a diff in the pull request: https://github.com/apache/lucene-solr/pull/342#discussion_r243532741 --- Diff: solr/core/src/java/org/apache/solr/security/AuditEvent.java --- @@ -0,0 +1,388 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.security; + +import javax.servlet.http.HttpServletRequest; +import java.lang.invoke.MethodHandles; +import java.security.Principal; +import java.util.Date; +import java.util.Enumeration; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.stream.Collectors; + +import org.apache.solr.common.SolrException; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.solr.security.AuditEvent.EventType.ANONYMOUS; + +/** + * Audit event that takes request and auth context as input to be able to audit log custom things + */ +public class AuditEvent { + private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass()); + + private String message; + private Level level; + private Date date; + private String username; + private String session; + private String clientIp; + private List collections; + private Map context; + private HashMap headers; + private Map solrParams; + private String solrHost; + private int solrPort; + private String solrIp; + private String resource; + private String httpMethod; + private String queryString; + private EventType eventType; + private AuthorizationResponse autResponse; + private String requestType; + private double QTime = -1; + private int status = 0; + private Throwable exception; + + /* Predefined event types. Custom types can be made through constructor */ + public enum EventType { +AUTHENTICATED("Authenticated", "User successfully authenticated", Level.INFO), +REJECTED("Rejected", "Authentication request rejected", Level.WARN), +ANONYMOUS("Anonymous", "Request proceeds with unknown user", Level.INFO), +ANONYMOUS_REJECTED("AnonymousRejected", "Request from unknown user rejected", Level.WARN), +AUTHORIZED("Authorized", "Authorization succeeded", Level.INFO), +UNAUTHORIZED("Unauthorized", "Authorization failed", Level.WARN), +COMPLETED("Completed", "Request completed", Level.INFO), --- End diff -- In the latest commits I simplified this further. Now the default is to ever only generate events when the request is finished, not any intermediate ones, meaning that AUTHENTICATED, ANONYMOUS and AUTHORIZED are not logged by default. This saves some unnecessary object generation. Further, we now do a lightweight check on `shouldLog(eventType)` before even creating the AuditEvent object, which should further speed up things. --- - To unsubscribe, e-mail: dev-unsubscr...@lucene.apache.org For additional commands, e-mail: dev-h...@lucene.apache.org
[JENKINS] Lucene-Solr-NightlyTests-master - Build # 1728 - Still Failing
Build: https://builds.apache.org/job/Lucene-Solr-NightlyTests-master/1728/ 48 tests failed. FAILED: org.apache.solr.cloud.MultiThreadedOCPTest.test Error Message: acoll: 1545367628754 bcoll: 1545367628948 Stack Trace: java.lang.AssertionError: acoll: 1545367628754 bcoll: 1545367628948 at __randomizedtesting.SeedInfo.seed([CD6B8C8C6D70AF2A:453FB356C38CC2D2]:0) at org.junit.Assert.fail(Assert.java:88) at org.junit.Assert.assertTrue(Assert.java:41) at org.apache.solr.cloud.MultiThreadedOCPTest.testFillWorkQueue(MultiThreadedOCPTest.java:115) at org.apache.solr.cloud.MultiThreadedOCPTest.test(MultiThreadedOCPTest.java:70) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at com.carrotsearch.randomizedtesting.RandomizedRunner.invoke(RandomizedRunner.java:1750) at com.carrotsearch.randomizedtesting.RandomizedRunner$8.evaluate(RandomizedRunner.java:938) at com.carrotsearch.randomizedtesting.RandomizedRunner$9.evaluate(RandomizedRunner.java:974) at com.carrotsearch.randomizedtesting.RandomizedRunner$10.evaluate(RandomizedRunner.java:988) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsFixedStatement.callStatement(BaseDistributedSearchTestCase.java:1070) at org.apache.solr.BaseDistributedSearchTestCase$ShardsRepeatRule$ShardsStatement.evaluate(BaseDistributedSearchTestCase.java:1042) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.TestRuleSetupTeardownChained$1.evaluate(TestRuleSetupTeardownChained.java:49) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at org.apache.lucene.util.TestRuleThreadAndTestName$1.evaluate(TestRuleThreadAndTestName.java:48) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368) at com.carrotsearch.randomizedtesting.ThreadLeakControl.forkTimeoutingTask(ThreadLeakControl.java:817) at com.carrotsearch.randomizedtesting.ThreadLeakControl$3.evaluate(ThreadLeakControl.java:468) at com.carrotsearch.randomizedtesting.RandomizedRunner.runSingleTest(RandomizedRunner.java:947) at com.carrotsearch.randomizedtesting.RandomizedRunner$5.evaluate(RandomizedRunner.java:832) at com.carrotsearch.randomizedtesting.RandomizedRunner$6.evaluate(RandomizedRunner.java:883) at com.carrotsearch.randomizedtesting.RandomizedRunner$7.evaluate(RandomizedRunner.java:894) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.SystemPropertiesRestoreRule$1.evaluate(SystemPropertiesRestoreRule.java:57) at org.apache.lucene.util.AbstractBeforeAfterRule$1.evaluate(AbstractBeforeAfterRule.java:45) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleStoreClassName$1.evaluate(TestRuleStoreClassName.java:41) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.NoShadowingOrOverridesOnMethodsRule$1.evaluate(NoShadowingOrOverridesOnMethodsRule.java:40) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at org.apache.lucene.util.TestRuleAssertionsRequired$1.evaluate(TestRuleAssertionsRequired.java:53) at org.apache.lucene.util.TestRuleMarkFailure$1.evaluate(TestRuleMarkFailure.java:47) at org.apache.lucene.util.TestRuleIgnoreAfterMaxFailures$1.evaluate(TestRuleIgnoreAfterMaxFailures.java:64) at org.apache.lucene.util.TestRuleIgnoreTestSuites$1.evaluate(TestRuleIgnoreTestSuites.java:54) at com.carrotsearch.randomizedtesting.rules.StatementAdapter.evaluate(StatementAdapter.java:36) at com.carrotsearch.randomizedtesting.ThreadLeakControl$StatementRunner.run(ThreadLeakControl.java:368)
[JENKINS] Lucene-Solr-7.x-Linux (64bit/jdk-10.0.1) - Build # 3244 - Failure!
Build: https://jenkins.thetaphi.de/job/Lucene-Solr-7.x-Linux/3244/ Java: 64bit/jdk-10.0.1 -XX:-UseCompressedOops -XX:+UseConcMarkSweepGC All tests passed Build Log: [...truncated 1960 lines...] [junit4] JVM J1: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/core/test/temp/junit4-J1-20181221_074035_65616630432756181685411.syserr [junit4] >>> JVM J1 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J1: EOF [...truncated 5 lines...] [junit4] JVM J2: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/core/test/temp/junit4-J2-20181221_074035_65612466312431501041003.syserr [junit4] >>> JVM J2 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J2: EOF [...truncated 5 lines...] [junit4] JVM J0: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/core/test/temp/junit4-J0-20181221_074035_6561793381562116606235.syserr [junit4] >>> JVM J0 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J0: EOF [...truncated 307 lines...] [junit4] JVM J1: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/test-framework/test/temp/junit4-J1-20181221_074735_2656982169765410822820.syserr [junit4] >>> JVM J1 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J1: EOF [junit4] JVM J0: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/test-framework/test/temp/junit4-J0-20181221_074735_26518111235607860544997.syserr [junit4] >>> JVM J0 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J0: EOF [junit4] JVM J2: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/test-framework/test/temp/junit4-J2-20181221_074735_26612412075401816415717.syserr [junit4] >>> JVM J2 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J2: EOF [...truncated 1080 lines...] [junit4] JVM J1: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/analysis/common/test/temp/junit4-J1-20181221_074850_7997540958006541351063.syserr [junit4] >>> JVM J1 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J1: EOF [...truncated 3 lines...] [junit4] JVM J0: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/analysis/common/test/temp/junit4-J0-20181221_074850_79912001817916307069225.syserr [junit4] >>> JVM J0 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J0: EOF [...truncated 3 lines...] [junit4] JVM J2: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/analysis/common/test/temp/junit4-J2-20181221_074850_7995112446093861340704.syserr [junit4] >>> JVM J2 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J2: EOF [...truncated 255 lines...] [junit4] JVM J0: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/analysis/icu/test/temp/junit4-J0-20181221_075042_78316903883596744563195.syserr [junit4] >>> JVM J0 emitted unexpected output (verbatim) [junit4] OpenJDK 64-Bit Server VM warning: Option UseConcMarkSweepGC was deprecated in version 9.0 and will likely be removed in a future release. [junit4] <<< JVM J0: EOF [...truncated 3 lines...] [junit4] JVM J2: stderr was not empty, see: /home/jenkins/workspace/Lucene-Solr-7.x-Linux/lucene/build/analysis/icu/test/temp/junit4-J2-20181221_075042_7834321971735643832178.syserr [junit4] >>> JVM J2 emitted unexpected