tomglk commented on a change in pull request #123:
URL: https://github.com/apache/solr/pull/123#discussion_r633062781



##########
File path: 
solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
##########
@@ -57,50 +67,79 @@ public void setField(String field) {
   }
 
   @Override
-  public LinkedHashMap<String,Object> paramsToMap() {
-    final LinkedHashMap<String,Object> params = defaultParamsToMap();
+  public LinkedHashMap<String, Object> paramsToMap() {
+    final LinkedHashMap<String, Object> params = defaultParamsToMap();
     params.put("field", field);
     return params;
   }
 
   @Override
   protected void validate() throws FeatureException {
     if (field == null || field.isEmpty()) {
-      throw new FeatureException(getClass().getSimpleName()+
-          ": field must be provided");
+      throw new FeatureException(getClass().getSimpleName() + ": field must be 
provided");
     }
   }
 
-  public FieldValueFeature(String name, Map<String,Object> params) {
+  public FieldValueFeature(String name, Map<String, Object> params) {
     super(name, params);
   }
 
   @Override
-  public FeatureWeight createWeight(IndexSearcher searcher, boolean 
needsScores,
-      SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi)
-          throws IOException {
+  public FeatureWeight createWeight(IndexSearcher searcher, boolean 
needsScores, SolrQueryRequest request,
+                                    Query originalQuery, Map<String, String[]> 
efi) throws IOException {
     return new FieldValueFeatureWeight(searcher, request, originalQuery, efi);
   }
 
   public class FieldValueFeatureWeight extends FeatureWeight {
+    private final SchemaField schemaField;
 
     public FieldValueFeatureWeight(IndexSearcher searcher,
-        SolrQueryRequest request, Query originalQuery, Map<String,String[]> 
efi) {
+                                   SolrQueryRequest request, Query 
originalQuery, Map<String, String[]> efi) {
       super(FieldValueFeature.this, searcher, request, originalQuery, efi);
+      if (searcher instanceof SolrIndexSearcher) {
+        schemaField = ((SolrIndexSearcher) 
searcher).getSchema().getFieldOrNull(field);
+      } else {
+        schemaField = null;
+      }
     }
 
+    /**
+     * Return a FeatureScorer that uses docValues or storedFields if no 
docValues are present
+     *
+     * @param context the segment this FeatureScorer is working with
+     * @return FeatureScorer for the current segment and field
+     * @throws IOException as defined by abstract class Feature
+     */
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
+      if (schemaField != null && !schemaField.stored() && 
schemaField.hasDocValues()) {
+
+        FieldInfo fieldInfo = 
context.reader().getFieldInfos().fieldInfo(field);
+        DocValuesType docValuesType = fieldInfo != null ? 
fieldInfo.getDocValuesType() : DocValuesType.NONE;
+
+        if (DocValuesType.NUMERIC.equals(docValuesType)) {
+          return new NumericDocValuesFieldValueFeatureScorer(this, context,
+                  DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), 
schemaField.getType().getNumberType());
+        } else if (DocValuesType.SORTED.equals(docValuesType)) {
+          return new SortedDocValuesFieldValueFeatureScorer(this, context,
+                  DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS));
+          // If type is NONE, this segment has no docs with this field. That's 
not a problem, because we won't call score() anyway

Review comment:
       I think that's a great change, it really improves the readability!

##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -70,86 +78,92 @@ public void after() throws Exception {
 
   @Test
   public void testRanking() throws Exception {
-
-    final SolrQuery query = new SolrQuery();
-    query.setQuery("title:w1");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    // Normal term match
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
-
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
-
-    query.setQuery("*:*");
-    query.remove("rows");
-    query.add("rows", "8");
-    query.remove("rq");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=8}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    for (String field : FIELD_NAMES) {
+
+      final SolrQuery query = new SolrQuery();
+      query.setQuery("title:w1");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      // Normal term match
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
+
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
+
+      query.setQuery("*:*");
+      query.remove("rows");
+      query.add("rows", "8");
+      query.remove("rq");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    }
   }
 
 
   @Test
   public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws 
Exception {
-    SolrQuery query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
-    query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-    query.add("fl", "[fv]");
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(),
-            
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
-
+    for (String field : FIELD_NAMES) {
+      SolrQuery query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(), 
"/response/docs/[0]/id=='42'");
+      query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+      query.add("fl", "[fv]");
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(),
+              
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");

Review comment:
       I think the problem is `final private boolean extractAllFeatures;` in 
`LTRScoringQuery`. It is set to true and therefore all previous features are 
added to the response instead of just the model features.
   
   This is the logic (line 210 in LTRScoringQuery):
   ```java
   if (this.extractAllFeatures) {
       features = allFeatures;
   } else{
       features =  modelFeatures;
   }
   ```
   
   I didn't find a way to set it to false with a query param.
   So we would have two options:
   - use this constructor `public LTRScoringQuery(LTRScoringModel 
ltrScoringModel, boolean extractAllFeatures)`
   - adjust the xpath to something like `[ends-with(., '<fieldname>=0.0')]`
   
   I am trying to adjust the xpath, because I would prefer that solution, but I 
was not able to find the right pattern so far.

##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -70,86 +78,92 @@ public void after() throws Exception {
 
   @Test
   public void testRanking() throws Exception {
-
-    final SolrQuery query = new SolrQuery();
-    query.setQuery("title:w1");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    // Normal term match
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
-
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
-
-    query.setQuery("*:*");
-    query.remove("rows");
-    query.add("rows", "8");
-    query.remove("rq");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=8}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    for (String field : FIELD_NAMES) {
+
+      final SolrQuery query = new SolrQuery();
+      query.setQuery("title:w1");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      // Normal term match
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
+
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
+
+      query.setQuery("*:*");
+      query.remove("rows");
+      query.add("rows", "8");
+      query.remove("rq");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    }
   }
 
 
   @Test
   public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws 
Exception {
-    SolrQuery query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
-    query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-    query.add("fl", "[fv]");
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(),
-            
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
-
+    for (String field : FIELD_NAMES) {
+      SolrQuery query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(), 
"/response/docs/[0]/id=='42'");
+      query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+      query.add("fl", "[fv]");
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(),
+              
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");

Review comment:
       I think the problem is `final private boolean extractAllFeatures;` in 
`LTRScoringQuery`. It is set to true and therefore all previous features are 
added to the response instead of just the model features.
   
   This is the logic (line 210 in LTRScoringQuery):
   ```java
   if (this.extractAllFeatures) {
       features = allFeatures;
   } else{
       features =  modelFeatures;
   }
   ```
   
   I didn't find a way to set it to false with a query param.
   So we would have two options:
   - use this constructor `public LTRScoringQuery(LTRScoringModel 
ltrScoringModel, boolean extractAllFeatures)`
   - adjust the xpath to something like `[ends-with(., '<fieldname>=0.0')]`
   
   I am trying to adjust the xpath, because I would prefer that solution, but I 
was not able to find the right pattern so far.
   
   **Edit:**
   Ok, I took a break and realized my mistake (the break seemed to be necessary 
*smh*).
   I took another look at the jsonpath-adjustment. A pattern like this 
`"['response']['docs'][0][?(@['[fv]']=~ /.* <fieldName> =0.0.*/i )]"` should 
work but it would require an additional dependency for `jayway.jsonpath` and we 
would have to manually call `JsonPath.read(...)` and assert that the result is 
not null.
   
   I am not sure, whether that's nice, but maybe you want to build up upon that 
/ can generate an idea from here?

##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -56,11 +62,13 @@ public void before() throws Exception {
 
     assertU(commit());
 
-    loadFeature("popularity", FieldValueFeature.class.getName(),
-            "{\"field\":\"popularity\"}");
+    for (String field : FIELD_NAMES) {
+      loadFeature(field, FieldValueFeature.class.getName(),
+              "{\"field\":\""+field+"\"}");
 
-    loadModel("popularity-model", LinearModel.class.getName(),
-            new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}");
+      loadModel(field + "-model", LinearModel.class.getName(),
+              new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}");
+    }

Review comment:
       Thanks for continuing the investigation!
   I think the example you posted here should work, although I am still a bit 
irritated regarding the `extractAllFeatures = true`.
   
   As user I would expect it to always return all the features that were 
created, no matter if that happened before or after the model. 
   But the code makes it explicit, that these information should not be 
modified (`allFeatures` is an unmodifiableList).
   I think that a nicely placed docString could avoid some confusion.

##########
File path: 
solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
##########
@@ -106,18 +106,29 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
 
     /**
      * Return a FeatureScorer that uses docValues or storedFields if no 
docValues are present
+     *
      * @param context the segment this FeatureScorer is working with
      * @return FeatureScorer for the current segment and field
      * @throws IOException as defined by abstract class Feature
      */
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
       if (schemaField != null && !schemaField.stored() && 
schemaField.hasDocValues()) {
-        return new DocValuesFieldValueFeatureScorer(this, context,
-            DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), 
schemaField.getType());
+
+        FieldInfo fieldInfo = 
context.reader().getFieldInfos().fieldInfo(field);
+        DocValuesType docValuesType = fieldInfo != null ? 
fieldInfo.getDocValuesType() : DocValuesType.NONE;
+
+        if (DocValuesType.NUMERIC.equals(docValuesType) || 
DocValuesType.SORTED.equals(docValuesType)) {
+          return new DocValuesFieldValueFeatureScorer(this, context,
+                  DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), 
schemaField.getType(), docValuesType);
+          // If type is NONE, this segment has no docs with this field. That's 
not a problem, because we won't call score() anyway

Review comment:
       Great observation! I completely overlooked this.
   Your reasoning makes complete sense to me and some debugging verified the 
behavior you described.
   
   Even if it may be an edge case, I think it makes sense to make it explicit.
   In terms of performance you are also right. Trying to read the stored fields 
before returning the default value surely is more costly than just returning 
the value.
   
   I started to create a `DefaultValueFieldValueFeatureScorer` that always 
returns the default value, but until now I am struggling to find a way to test 
this.
   
   I hope your bike is well now! :)




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to