Manybubbles has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/207775

Change subject: Add field_value_factor_with_default
......................................................................

Add field_value_factor_with_default

This is a backport of an elasticsearch feature to add the "mising" field to
field_value_factor. That field contains a value that is used by the
field_value_factor when the document is missing the field.

Change-Id: Ic465b9fe88caf1ce6520f8b376956f2737695269
---
M README.md
A docs/field_value_factor_with_default.md
M src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
A 
src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefault.java
A 
src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultBuilder.java
A 
src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultParser.java
A src/main/java/org/wikimedia/search/extra/fieldvaluefactor/package-info.java
M src/main/java/org/wikimedia/search/extra/idhashmod/IdHashModFilter.java
A src/main/java/org/wikimedia/search/extra/idhashmod/package-info.java
A 
src/test/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorWithDefaultTest.java
10 files changed, 415 insertions(+), 5 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/search/extra 
refs/changes/75/207775/1

diff --git a/README.md b/README.md
index 3bacd1d..2947a8d 100644
--- a/README.md
+++ b/README.md
@@ -1,9 +1,9 @@
 Extra Queries and Filters
 =========================
 
-The plan is for this to include any extra queries, filters, and native scripts
-we end up creating to make search nice for Wikimedia.  At this point it only
-contains:
+The plan is for this to include any extra queries, filters, native scripts,
+score functions, and anything else we think we end up creating to make search
+nice for Wikimedia.  At this point it only contains:
 
 Filters:
 * [source_regex](docs/source_regex.md) - An nGram accelerated regular
@@ -12,7 +12,7 @@
 * [id_hash_mod](docs/id_hash_mod.md) - Filter used to select all documents
 independantly. For example, it can be used by multiple processes to reindex
 all documents without any interprocess communication. Added in 1.5.0, 1.4.1,
-and 1.3.1.
+and 1.3.0.
 
 Queries:
 * [safer](docs/safer.md) - Wraps other queries and analyzes them for
@@ -23,6 +23,12 @@
 * [super_detect_noop](docs/super_detect_noop.md) - Like ```detect_noop``` but
 supports configurable sloppiness. New in 1.5.0, 1.4.1, and 1.3.1.
 
+Score Functions:
+* [field_value_factor_with_default](docs/field_value_factor_with_default.md) -
+Just like field_value_factor except it supports a ```missing``` parameter that
+is the value used if the field is missing from the document being scored. Added
+in 1.5.0, 1.4.1, and 1.3.0.
+
 | Extra Queries and Filters Plugin |  ElasticSearch  |
 |----------------------------------|-----------------|
 | master                           | 1.3.4 -> 1.3.X  |
diff --git a/docs/field_value_factor_with_default.md 
b/docs/field_value_factor_with_default.md
new file mode 100644
index 0000000..a5e6101
--- /dev/null
+++ b/docs/field_value_factor_with_default.md
@@ -0,0 +1,7 @@
+field_value_factor_with_Default
+===============================
+
+The ```field_value_factor_with_default``` is a backport of [an Elasticsearch 
feature](https://github.com/elastic/elasticsearch/issues/10841)
+that will be available in Elasticsearch 1.6.0 and 2.0.0 to support a
+```missing``` parameter that functions as a default value to use when scoring
+documents that are missing the field used to score the 
```field_value_factor```.
diff --git a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java 
b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
index 418ceca..9dc3c65 100644
--- a/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
+++ b/src/main/java/org/wikimedia/search/extra/ExtraPlugin.java
@@ -8,9 +8,11 @@
 import org.elasticsearch.common.inject.multibindings.Multibinder;
 import org.elasticsearch.common.settings.Settings;
 import org.elasticsearch.index.query.QueryParser;
+import org.elasticsearch.index.query.functionscore.FunctionScoreModule;
 import org.elasticsearch.indices.query.IndicesQueriesModule;
 import org.elasticsearch.plugins.AbstractPlugin;
 import org.elasticsearch.script.ScriptModule;
+import 
org.wikimedia.search.extra.fieldvaluefactor.FieldValueFactorFunctionWithDefaultParser;
 import org.wikimedia.search.extra.idhashmod.IdHashModFilterParser;
 import org.wikimedia.search.extra.regex.SourceRegexFilterParser;
 import org.wikimedia.search.extra.safer.ActionModuleParser;
@@ -53,6 +55,13 @@
         module.registerScript("super_detect_noop", 
SuperDetectNoopScript.Factory.class);
     }
 
+    /**
+     * Register our function scores.
+     */
+    public void onModule(FunctionScoreModule module) {
+        module.registerParser(FieldValueFactorFunctionWithDefaultParser.class);
+    }
+
     @Override
     public Collection<Class<? extends Module>> modules() {
         return ImmutableList.<Class<? extends Module>> 
of(SafeifierActionsModule.class, CloseEnoughDetectorsModule.class);
diff --git 
a/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefault.java
 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefault.java
new file mode 100644
index 0000000..86a4f7e
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefault.java
@@ -0,0 +1,150 @@
+package org.wikimedia.search.extra.fieldvaluefactor;
+
+import java.util.Locale;
+
+import org.apache.lucene.index.AtomicReaderContext;
+import org.apache.lucene.search.Explanation;
+import org.elasticsearch.ElasticsearchException;
+import org.elasticsearch.common.lucene.search.function.CombineFunction;
+import 
org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.elasticsearch.common.lucene.search.function.ScoreFunction;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
+import org.elasticsearch.index.fielddata.SortedNumericDoubleValues;
+
+/**
+ * Implements field_value_factor_with_default. Basically a copy of 
Elasticsearch's
+ * FieldValueFactorParser in 1.4 with
+ * https://github.com/elastic/elasticsearch/pull/10845 applied.
+ */
+public class FieldValueFactorFunctionWithDefault extends ScoreFunction {
+    private final String field;
+    private final float boostFactor;
+    private final FieldValueFactorFunction.Modifier modifier;
+    private final Double missing;
+    private final IndexNumericFieldData indexFieldData;
+    private SortedNumericDoubleValues values;
+
+    public FieldValueFactorFunctionWithDefault(String field, float 
boostFactor, FieldValueFactorFunction.Modifier modifierType,
+            Double missing, IndexNumericFieldData indexFieldData) {
+        super(CombineFunction.MULT);
+        this.field = field;
+        this.boostFactor = boostFactor;
+        this.modifier = modifierType;
+        this.missing = missing;
+        this.indexFieldData = indexFieldData;
+    }
+
+    @Override
+    public void setNextReader(AtomicReaderContext context) {
+        this.values = this.indexFieldData.load(context).getDoubleValues();
+    }
+
+    @Override
+    public double score(int docId, float subQueryScore) {
+        this.values.setDocument(docId);
+        final int numValues = this.values.count();
+        double value;
+        if (numValues > 0) {
+            value = this.values.valueAt(0);
+        } else if (missing != null) {
+            value = missing;
+        } else {
+            throw new ElasticsearchException("Missing value for field [" + 
field + "]");
+        }
+        double val = value * boostFactor;
+        double result = modifier.apply(val);
+        if (Double.isNaN(result) || Double.isInfinite(result)) {
+            throw new ElasticsearchException("Result of field modification [" 
+ modifier.toString() + "(" + val + ")] must be a number");
+        }
+        return result;
+    }
+
+    @Override
+    public Explanation explainScore(int docId, float subQueryScore) {
+        Explanation exp = new Explanation();
+        String modifierStr = modifier != null ? modifier.toString() : "";
+        String defaultStr = missing != null ? "?:" + missing : "";
+        double score = score(docId, subQueryScore);
+        exp.setValue(CombineFunction.toFloat(score));
+        exp.setDescription(String.format(Locale.ROOT, "field value function: 
%s(doc['%s'].value%s * factor=%s)", modifierStr, field,
+                defaultStr, boostFactor));
+        return exp;
+    }
+
+    /**
+     * The Type class encapsulates the modification types that can be applied 
to
+     * the score/value product.
+     */
+    public enum Modifier {
+        NONE {
+            @Override
+            public double apply(double n) {
+                return n;
+            }
+        },
+        LOG {
+            @Override
+            public double apply(double n) {
+                return Math.log10(n);
+            }
+        },
+        LOG1P {
+            @Override
+            public double apply(double n) {
+                return Math.log10(n + 1);
+            }
+        },
+        LOG2P {
+            @Override
+            public double apply(double n) {
+                return Math.log10(n + 2);
+            }
+        },
+        LN {
+            @Override
+            public double apply(double n) {
+                return Math.log(n);
+            }
+        },
+        LN1P {
+            @Override
+            public double apply(double n) {
+                return Math.log1p(n);
+            }
+        },
+        LN2P {
+            @Override
+            public double apply(double n) {
+                return Math.log1p(n + 1);
+            }
+        },
+        SQUARE {
+            @Override
+            public double apply(double n) {
+                return Math.pow(n, 2);
+            }
+        },
+        SQRT {
+            @Override
+            public double apply(double n) {
+                return Math.sqrt(n);
+            }
+        },
+        RECIPROCAL {
+            @Override
+            public double apply(double n) {
+                return 1.0 / n;
+            }
+        };
+
+        public abstract double apply(double n);
+
+        @Override
+        public String toString() {
+            if (this == NONE) {
+                return "";
+            }
+            return super.toString().toLowerCase(Locale.ROOT);
+        }
+    }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultBuilder.java
 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultBuilder.java
new file mode 100644
index 0000000..5b72b0c
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultBuilder.java
@@ -0,0 +1,66 @@
+package org.wikimedia.search.extra.fieldvaluefactor;
+
+import java.io.IOException;
+import java.util.Locale;
+
+import 
org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.elasticsearch.common.xcontent.XContentBuilder;
+import org.elasticsearch.index.query.functionscore.ScoreFunctionBuilder;
+
+/**
+ * Builds field_value_factor_with_default. Basically a copy of Elasticsearch's
+ * FieldValueFactorParser in 1.4 with
+ * https://github.com/elastic/elasticsearch/pull/10845 applied.
+ */
+public class FieldValueFactorFunctionWithDefaultBuilder extends 
ScoreFunctionBuilder {
+    private String field = null;
+    private Float factor = null;
+    private FieldValueFactorFunction.Modifier modifier = null;
+    private Double missing = null;
+
+    public FieldValueFactorFunctionWithDefaultBuilder(String fieldName) {
+        this.field = fieldName;
+    }
+
+    @Override
+    public String getName() {
+        return FieldValueFactorFunctionWithDefaultParser.NAMES[0];
+    }
+
+    public FieldValueFactorFunctionWithDefaultBuilder factor(float 
boostFactor) {
+        this.factor = boostFactor;
+        return this;
+    }
+
+    public FieldValueFactorFunctionWithDefaultBuilder 
modifier(FieldValueFactorFunction.Modifier modifier) {
+        this.modifier = modifier;
+        return this;
+    }
+
+    public FieldValueFactorFunctionWithDefaultBuilder missing(double missing) {
+        this.missing = missing;
+        return this;
+    }
+
+    @Override
+    public void doXContent(XContentBuilder builder, Params params) throws 
IOException {
+        builder.startObject(getName());
+        if (field != null) {
+            builder.field("field", field);
+        }
+
+        if (factor != null) {
+            builder.field("factor", factor);
+        }
+
+        if (modifier != null) {
+            builder.field("modifier", 
modifier.toString().toLowerCase(Locale.ROOT));
+        }
+
+        if (missing != null) {
+            builder.field("missing", missing);
+        }
+
+        builder.endObject();
+    }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultParser.java
 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultParser.java
new file mode 100644
index 0000000..8e65667
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorFunctionWithDefaultParser.java
@@ -0,0 +1,70 @@
+package org.wikimedia.search.extra.fieldvaluefactor;
+
+import java.io.IOException;
+import java.util.Locale;
+
+import org.elasticsearch.ElasticsearchException;
+import 
org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.elasticsearch.common.lucene.search.function.ScoreFunction;
+import org.elasticsearch.common.xcontent.XContentParser;
+import org.elasticsearch.index.fielddata.IndexNumericFieldData;
+import org.elasticsearch.index.mapper.FieldMapper;
+import org.elasticsearch.index.query.QueryParseContext;
+import org.elasticsearch.index.query.QueryParsingException;
+import org.elasticsearch.index.query.functionscore.ScoreFunctionParser;
+import org.elasticsearch.search.internal.SearchContext;
+
+/**
+ * Parses field_value_factor_with_default. Basically a copy of Elasticsearch's
+ * FieldValueFactorParser in 1.4 with
+ * https://github.com/elastic/elasticsearch/pull/10845 applied.
+ */
+public class FieldValueFactorFunctionWithDefaultParser implements 
ScoreFunctionParser {
+    public static String[] NAMES = { "field_value_factor_with_default", 
"fieldValueFactorWithDefault" };
+
+    @Override
+    public ScoreFunction parse(QueryParseContext parseContext, XContentParser 
parser) throws IOException, QueryParsingException {
+
+        String currentFieldName = null;
+        String field = null;
+        float boostFactor = 1;
+        FieldValueFactorFunction.Modifier modifier = 
FieldValueFactorFunction.Modifier.NONE;
+        Double missing = null;
+        XContentParser.Token token;
+        while ((token = parser.nextToken()) != 
XContentParser.Token.END_OBJECT) {
+            if (token == XContentParser.Token.FIELD_NAME) {
+                currentFieldName = parser.currentName();
+            } else if (token.isValue()) {
+                if ("field".equals(currentFieldName)) {
+                    field = parser.text();
+                } else if ("factor".equals(currentFieldName)) {
+                    boostFactor = parser.floatValue();
+                } else if ("modifier".equals(currentFieldName)) {
+                    modifier = 
FieldValueFactorFunction.Modifier.valueOf(parser.text().toUpperCase(Locale.ROOT));
+                } else if ("missing".equals(currentFieldName)) {
+                    missing = parser.doubleValue();
+                } else {
+                    throw new QueryParsingException(parseContext.index(), 
NAMES[0] + " query does not support [" + currentFieldName + "]");
+                }
+            }
+        }
+
+        if (field == null) {
+            throw new QueryParsingException(parseContext.index(), "[" + 
NAMES[0] + "] required field 'field' missing");
+        }
+
+        SearchContext searchContext = SearchContext.current();
+        @SuppressWarnings("rawtypes")
+        FieldMapper mapper = 
searchContext.mapperService().smartNameFieldMapper(field);
+        if (mapper == null) {
+            throw new ElasticsearchException("Unable to find a field mapper 
for field [" + field + "]");
+        }
+        return new FieldValueFactorFunctionWithDefault(field, boostFactor, 
modifier, missing, (IndexNumericFieldData) searchContext.fieldData()
+                .getForField(mapper));
+    }
+
+    @Override
+    public String[] getNames() {
+        return NAMES;
+    }
+}
diff --git 
a/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/package-info.java 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/package-info.java
new file mode 100644
index 0000000..695c1da
--- /dev/null
+++ 
b/src/main/java/org/wikimedia/search/extra/fieldvaluefactor/package-info.java
@@ -0,0 +1,5 @@
+/**
+ * Default value support for field_value_factor named 
field_value_factor_default.
+ * Backport of https://github.com/elastic/elasticsearch/pull/10845.
+ */
+package org.wikimedia.search.extra.fieldvaluefactor;
\ No newline at end of file
diff --git 
a/src/main/java/org/wikimedia/search/extra/idhashmod/IdHashModFilter.java 
b/src/main/java/org/wikimedia/search/extra/idhashmod/IdHashModFilter.java
index 59648a4..5555eeb 100644
--- a/src/main/java/org/wikimedia/search/extra/idhashmod/IdHashModFilter.java
+++ b/src/main/java/org/wikimedia/search/extra/idhashmod/IdHashModFilter.java
@@ -13,7 +13,7 @@
 import org.elasticsearch.index.fielddata.ScriptDocValues;
 
 /**
- * Filters to fields who's _uid's hash matches a number mod some other number.
+ * Filters to document's whose _uid's hash matches a number mod some other 
number.
  * Its a simple way of slicing the index into chunks that can be processed
  * totally independently. Its used by CirrusSearch to reindex in multiple
  * Independent processes. Its the same as the following script:
diff --git 
a/src/main/java/org/wikimedia/search/extra/idhashmod/package-info.java 
b/src/main/java/org/wikimedia/search/extra/idhashmod/package-info.java
new file mode 100644
index 0000000..fb0c94c
--- /dev/null
+++ b/src/main/java/org/wikimedia/search/extra/idhashmod/package-info.java
@@ -0,0 +1,6 @@
+/**
+ * Filters to document's who's _uid's hash matches a number mod some other 
number.
+ * Its a simple way of slicing the index into chunks that can be processed
+ * totally independently.
+ */
+package org.wikimedia.search.extra.idhashmod;
\ No newline at end of file
diff --git 
a/src/test/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorWithDefaultTest.java
 
b/src/test/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorWithDefaultTest.java
new file mode 100644
index 0000000..5284624
--- /dev/null
+++ 
b/src/test/java/org/wikimedia/search/extra/fieldvaluefactor/FieldValueFactorWithDefaultTest.java
@@ -0,0 +1,91 @@
+package org.wikimedia.search.extra.fieldvaluefactor;
+
+import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
+import static org.elasticsearch.index.query.QueryBuilders.functionScoreQuery;
+import static org.elasticsearch.index.query.QueryBuilders.matchAllQuery;
+import static org.elasticsearch.index.query.QueryBuilders.simpleQueryString;
+import static 
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertAcked;
+import static 
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertFailures;
+import static 
org.elasticsearch.test.hamcrest.ElasticsearchAssertions.assertOrderedSearchHits;
+
+import java.io.IOException;
+
+import org.elasticsearch.action.search.SearchPhaseExecutionException;
+import org.elasticsearch.action.search.SearchResponse;
+import 
org.elasticsearch.common.lucene.search.function.FieldValueFactorFunction;
+import org.junit.Test;
+import org.wikimedia.search.extra.AbstractPluginIntegrationTest;
+
+/**
+ * Tests field_value_factor_with_default. Basically a copy of Elasticsearch's
+ * FunctionScoreFieldValueTests with
+ * https://github.com/elastic/elasticsearch/pull/10845 applied.
+ */
+public class FieldValueFactorWithDefaultTest extends 
AbstractPluginIntegrationTest {
+    @Test
+    public void testFieldValueFactor() throws IOException {
+        assertAcked(prepareCreate("test").addMapping(
+                "type1",
+                
jsonBuilder().startObject().startObject("type1").startObject("properties").startObject("test")
+                        .field("type", randomFrom(new String[] { "short", 
"float", "long", "integer", "double" })).endObject()
+                        .startObject("body").field("type", 
"string").endObject().endObject().endObject().endObject()).get());
+        ensureYellow();
+
+        client().prepareIndex("test", "type1", "1").setSource("test", 5, 
"body", "foo").get();
+        client().prepareIndex("test", "type1", "2").setSource("test", 17, 
"body", "foo").get();
+        client().prepareIndex("test", "type1", "3").setSource("body", 
"bar").get();
+
+        refresh();
+
+        // document 2 scores higher because 17 > 5
+        SearchResponse response = 
client().prepareSearch("test").setExplain(randomBoolean())
+                .setQuery(functionScoreQuery(simpleQueryString("foo"), new 
FieldValueFactorFunctionWithDefaultBuilder("test"))).get();
+        assertOrderedSearchHits(response, "2", "1");
+
+        // document 1 scores higher because 1/5 > 1/17
+        response = client()
+                .prepareSearch("test")
+                .setExplain(randomBoolean())
+                .setQuery(
+                        functionScoreQuery(simpleQueryString("foo"), new 
FieldValueFactorFunctionWithDefaultBuilder("test")
+                                
.modifier(FieldValueFactorFunction.Modifier.RECIPROCAL))).get();
+        assertOrderedSearchHits(response, "1", "2");
+
+        // doc 3 doesn't have a "test" field, so an exception will be thrown
+        try {
+            response = 
client().prepareSearch("test").setExplain(randomBoolean())
+                    .setQuery(functionScoreQuery(matchAllQuery(), new 
FieldValueFactorFunctionWithDefaultBuilder("test"))).get();
+            assertFailures(response);
+        } catch (SearchPhaseExecutionException e) {
+            // We are expecting an exception, because 3 has no field
+        }
+
+        // doc 3 doesn't have a "test" field but we're defaulting it to 100 so
+        // it should be last
+        response = client()
+                .prepareSearch("test")
+                .setExplain(randomBoolean())
+                .setQuery(
+                        functionScoreQuery(
+                                matchAllQuery(),
+                                new 
FieldValueFactorFunctionWithDefaultBuilder("test").modifier(
+                                        
FieldValueFactorFunction.Modifier.RECIPROCAL).missing(100))).get();
+        assertOrderedSearchHits(response, "1", "2", "3");
+
+        // n divided by 0 is infinity, which should provoke an exception.
+        try {
+            response = client()
+                    .prepareSearch("test")
+                    .setExplain(randomBoolean())
+                    .setQuery(
+                            functionScoreQuery(
+                                    simpleQueryString("foo"),
+                                    new 
FieldValueFactorFunctionWithDefaultBuilder("test").modifier(
+                                            
FieldValueFactorFunction.Modifier.RECIPROCAL).factor(0))).get();
+            assertFailures(response);
+        } catch (SearchPhaseExecutionException e) {
+            // This is fine, the query will throw an exception if executed
+            // locally, instead of just having failures
+        }
+    }
+}

-- 
To view, visit https://gerrit.wikimedia.org/r/207775
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Ic465b9fe88caf1ce6520f8b376956f2737695269
Gerrit-PatchSet: 1
Gerrit-Project: search/extra
Gerrit-Branch: 1.3
Gerrit-Owner: Manybubbles <never...@wikimedia.org>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to