DCausse has uploaded a new change for review. ( 
https://gerrit.wikimedia.org/r/328644 )

Change subject: Workaround timeout limitation
......................................................................

Workaround timeout limitation

Timeout checks are passive with Lucene, it means that the timeout is
only checked when a doc matches or when we switch to a new segment.
The problem is that the timeout can be very inaccurate when scanning a
large segment where no docs match.
Workaround this issue by a horrible hack:
- inspect elastic SearchContect.current() to determine if a timeout is
  used
- Build a fake collector to be able to throw TimeExceededException
  (private constructor)

This hack will be certainly hard to port to future elastic version
(esp. if the SearchContext becomes hard to access at this stage)

Bug: T152895
Change-Id: I1ce0378e475bf5c122cb8586916722992f82bed3
---
M 
src/main/java/org/wikimedia/search/extra/regex/AcceleratedSourceRegexQuery.java
M 
src/main/java/org/wikimedia/search/extra/regex/UnacceleratedSourceRegexQuery.java
M src/test/java/org/wikimedia/search/extra/regex/SourceRegexQueryTest.java
3 files changed, 68 insertions(+), 11 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/extra 
refs/changes/44/328644/1

diff --git 
a/src/main/java/org/wikimedia/search/extra/regex/AcceleratedSourceRegexQuery.java
 
b/src/main/java/org/wikimedia/search/extra/regex/AcceleratedSourceRegexQuery.java
index 097b2ad..aae2e77 100644
--- 
a/src/main/java/org/wikimedia/search/extra/regex/AcceleratedSourceRegexQuery.java
+++ 
b/src/main/java/org/wikimedia/search/extra/regex/AcceleratedSourceRegexQuery.java
@@ -44,6 +44,7 @@
             // TODO: Get rid of this shared mutable state, we should be able 
to use
             // the generic timeout system.
             private final MutableValueInt inspected = new MutableValueInt();
+            private final TimeoutChecker timeoutChecker = new TimeoutChecker();
 
             @Override
             public Scorer scorer(final LeafReaderContext context) throws 
IOException {
@@ -51,7 +52,8 @@
                 if(approxScorer == null) {
                     return null;
                 }
-                return new ConstantScoreScorer(this, 1f, new 
RegexTwoPhaseIterator(approxScorer.iterator(), context, inspected));
+                timeoutChecker.nextSegment(context);
+                return new ConstantScoreScorer(this, 1f, new 
RegexTwoPhaseIterator(approxScorer.iterator(), context, inspected, 
timeoutChecker));
             }
         };
     }
diff --git 
a/src/main/java/org/wikimedia/search/extra/regex/UnacceleratedSourceRegexQuery.java
 
b/src/main/java/org/wikimedia/search/extra/regex/UnacceleratedSourceRegexQuery.java
index 0dee8aa..7968a3b 100644
--- 
a/src/main/java/org/wikimedia/search/extra/regex/UnacceleratedSourceRegexQuery.java
+++ 
b/src/main/java/org/wikimedia/search/extra/regex/UnacceleratedSourceRegexQuery.java
@@ -5,15 +5,10 @@
 
 import lombok.EqualsAndHashCode;
 import org.apache.lucene.index.LeafReaderContext;
-import org.apache.lucene.search.ConstantScoreScorer;
-import org.apache.lucene.search.ConstantScoreWeight;
-import org.apache.lucene.search.DocIdSetIterator;
-import org.apache.lucene.search.IndexSearcher;
-import org.apache.lucene.search.Query;
-import org.apache.lucene.search.Scorer;
-import org.apache.lucene.search.TwoPhaseIterator;
-import org.apache.lucene.search.Weight;
+import org.apache.lucene.search.*;
 import org.apache.lucene.util.mutable.MutableValueInt;
+import org.elasticsearch.search.SearchService;
+import org.elasticsearch.search.internal.SearchContext;
 import org.wikimedia.search.extra.regex.SourceRegexQuery.Rechecker;
 import org.wikimedia.search.extra.regex.SourceRegexQuery.Settings;
 import org.wikimedia.search.extra.util.FieldValues;
@@ -56,9 +51,11 @@
             // TODO: Get rid of this shared mutable state, we should be able 
to use
             // the generic timeout system.
             private final MutableValueInt inspected = new MutableValueInt();
+            private final TimeoutChecker timeoutChecker = new TimeoutChecker();
 
             @Override
             public Scorer scorer(final LeafReaderContext context) throws 
IOException {
+                timeoutChecker.nextSegment(context);
                 // We can stop matching early if we are allowed to inspect less
                 // doc than the number of docs available in this segment.
                 // This is because we use a DocIdSetIterator.all.
@@ -66,25 +63,29 @@
                 if(remaining < 0) {
                     remaining = 0;
                 }
+                context.reader().getLiveDocs();
                 int maxDoc = remaining > context.reader().maxDoc() ? 
context.reader().maxDoc() : remaining;
                 final DocIdSetIterator approximation = 
DocIdSetIterator.all(maxDoc);
-                return new ConstantScoreScorer(this, 1f, new 
RegexTwoPhaseIterator(approximation, context, inspected));
+                return new ConstantScoreScorer(this, 1f, new 
RegexTwoPhaseIterator(approximation, context, inspected, timeoutChecker));
             }
         };
     }
 
     protected class RegexTwoPhaseIterator extends TwoPhaseIterator {
         private final LeafReaderContext context;
+        private final TimeoutChecker timeoutChecker;
         private MutableValueInt inspected;
 
-        protected RegexTwoPhaseIterator(DocIdSetIterator approximation, 
LeafReaderContext context, MutableValueInt inspected) {
+        protected RegexTwoPhaseIterator(DocIdSetIterator approximation, 
LeafReaderContext context, MutableValueInt inspected, TimeoutChecker 
timeoutChecker) {
             super(approximation);
             this.context = context;
             this.inspected = inspected;
+            this.timeoutChecker = timeoutChecker;
         }
 
         @Override
         public boolean matches() throws IOException {
+            timeoutChecker.check(approximation.docID());
             if (inspected.value >= settings.getMaxInspect()) {
                 return false;
             }
@@ -105,4 +106,36 @@
         }
     }
 
+    /**
+     * Horrible hack to workaround the fact that 
TimeLimitingCollector.TimeExceededException has a private ctor
+     * FIXME: find proper solutions to handle timeouts
+     */
+    protected static class TimeoutChecker {
+        private LeafCollector collector;
+        private final Collector topCollector;
+        public TimeoutChecker() throws IOException {
+            long timeout = SearchContext.current().timeoutInMillis();
+            if(timeout != SearchService.NO_TIMEOUT.millis()) {
+                topCollector = new TimeLimitingCollector(new NullCollector(), 
SearchContext.current().timeEstimateCounter(), timeout);
+            } else {
+                topCollector = new NullCollector();
+            }
+        }
+        public void check(int docId) throws IOException {
+            collector.collect(docId);
+        }
+
+        public void nextSegment(LeafReaderContext context) throws IOException {
+            collector = topCollector.getLeafCollector(context);
+        }
+
+        private static class NullCollector extends SimpleCollector {
+            @Override
+            public void collect(int doc) throws IOException {}
+            @Override
+            public boolean needsScores() {
+                return true;
+            }
+        }
+    }
 }
diff --git 
a/src/test/java/org/wikimedia/search/extra/regex/SourceRegexQueryTest.java 
b/src/test/java/org/wikimedia/search/extra/regex/SourceRegexQueryTest.java
index 5ae46c9..283572a 100644
--- a/src/test/java/org/wikimedia/search/extra/regex/SourceRegexQueryTest.java
+++ b/src/test/java/org/wikimedia/search/extra/regex/SourceRegexQueryTest.java
@@ -20,6 +20,7 @@
 import org.elasticsearch.action.search.SearchResponse;
 import org.elasticsearch.common.xcontent.XContentBuilder;
 import org.elasticsearch.rest.RestStatus;
+import org.junit.Assert;
 import org.junit.Test;
 import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
 
@@ -155,6 +156,27 @@
     }
 
     @Test
+    public void testTimeout() throws InterruptedException, ExecutionException,
+            IOException {
+        setup();
+        indexRandom(false, doc("findmefound0", "findmefound"));
+        for(int i = 1; i < 10000; i++) {
+            indexRandom(false, doc("findmenotfound"+i, "findmenotfound "));
+            indexRandom(false, doc("findmefound"+i, "findmenot"));
+        }
+        
client().admin().indices().prepareForceMerge("test").setMaxNumSegments(1).get();
+        refresh();
+
+        // warmup
+        SearchResponse resp = search(filter("findme")).get();
+        Assert.assertTrue(resp.getHits().getTotalHits() > 0);
+
+        resp = 
search(filter("(((f.){1,20}n.){1,20}m.){1,20}")).setSize(10).setTimeout("500ms").get();
+        Assert.assertTrue(resp.getHits().getTotalHits() > 0);
+        Assert.assertTrue(resp.isTimedOut());
+    }
+
+    @Test
     public void caseInsensitiveMatching() throws InterruptedException, 
ExecutionException, IOException {
         setup();
         indexRandom(true, doc("findme", "I have the test in me."));

-- 
To view, visit https://gerrit.wikimedia.org/r/328644
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I1ce0378e475bf5c122cb8586916722992f82bed3
Gerrit-PatchSet: 1
Gerrit-Project: search/extra
Gerrit-Branch: master
Gerrit-Owner: DCausse <dcau...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to