cpoerschke commented on a change in pull request #123:
URL: https://github.com/apache/solr/pull/123#discussion_r633135019



##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -56,11 +62,13 @@ public void before() throws Exception {
 
     assertU(commit());
 
-    loadFeature("popularity", FieldValueFeature.class.getName(),
-            "{\"field\":\"popularity\"}");
+    for (String field : FIELD_NAMES) {
+      loadFeature(field, FieldValueFeature.class.getName(),
+              "{\"field\":\""+field+"\"}");
 
-    loadModel("popularity-model", LinearModel.class.getName(),
-            new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}");
+      loadModel(field + "-model", LinearModel.class.getName(),
+              new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}");
+    }

Review comment:
       2/2 ... I think the explanation for the mystery is here i.e. the 
`LinearModel` constructor signature and the interleaving of feature and model 
loading results in the models having different definitions of what 
`allFeatures` means. One solution then would be for the test expectations to 
account for the multiple meanings of `allFeatures` or (my preference probably) 
for there to be only one `allFeatures` meaning e.g. via the separation of 
feature and model loading. Or perhaps you can think of a third solution even?
   
   ```
   for (String field : FIELD_NAMES) {
     loadFeature(field, ...);
   }
   for (String field : FIELD_NAMES) {
     loadModel(field + "-model", ...);
   }
   ```

##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -70,86 +78,92 @@ public void after() throws Exception {
 
   @Test
   public void testRanking() throws Exception {
-
-    final SolrQuery query = new SolrQuery();
-    query.setQuery("title:w1");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    // Normal term match
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
-
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
-
-    query.setQuery("*:*");
-    query.remove("rows");
-    query.add("rows", "8");
-    query.remove("rq");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=8}");
-
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    for (String field : FIELD_NAMES) {
+
+      final SolrQuery query = new SolrQuery();
+      query.setQuery("title:w1");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      // Normal term match
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'");
+
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==4");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'");
+
+      query.setQuery("*:*");
+      query.remove("rows");
+      query.add("rows", "8");
+      query.remove("rq");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}");
+
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'");
+      assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'");
+    }
   }
 
 
   @Test
   public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws 
Exception {
-    SolrQuery query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("fl", "*, score");
-    query.add("rows", "4");
-
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'");
-    query = new SolrQuery();
-    query.setQuery("id:42");
-    query.add("rq", "{!ltr model=popularity-model reRankDocs=4}");
-    query.add("fl", "[fv]");
-    assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
-    assertJQ("/query" + query.toQueryString(),
-            
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");
-
+    for (String field : FIELD_NAMES) {
+      SolrQuery query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("fl", "*, score");
+      query.add("rows", "4");
+
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(), 
"/response/docs/[0]/id=='42'");
+      query = new SolrQuery();
+      query.setQuery("id:42");
+      query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}");
+      query.add("fl", "[fv]");
+      assertJQ("/query" + query.toQueryString(), "/response/numFound/==1");
+      assertJQ("/query" + query.toQueryString(),
+              
"/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}");

Review comment:
       1/2 Thanks for investigating in detail here! I agree the explanation is 
with the `extractAllFeatures` boolean i.e. the response contains the values for 
all the features in the feature store. The puzzling thing then though is why 
the first run of the loop succeeds and only subsequent runs fail. Since all 
loop runs use the same feature store and surely each loop run should return the 
same feature values? ...

##########
File path: 
solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java
##########
@@ -106,18 +106,29 @@ public FieldValueFeatureWeight(IndexSearcher searcher,
 
     /**
      * Return a FeatureScorer that uses docValues or storedFields if no 
docValues are present
+     *
      * @param context the segment this FeatureScorer is working with
      * @return FeatureScorer for the current segment and field
      * @throws IOException as defined by abstract class Feature
      */
     @Override
     public FeatureScorer scorer(LeafReaderContext context) throws IOException {
       if (schemaField != null && !schemaField.stored() && 
schemaField.hasDocValues()) {
-        return new DocValuesFieldValueFeatureScorer(this, context,
-            DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), 
schemaField.getType());
+
+        FieldInfo fieldInfo = 
context.reader().getFieldInfos().fieldInfo(field);
+        DocValuesType docValuesType = fieldInfo != null ? 
fieldInfo.getDocValuesType() : DocValuesType.NONE;
+
+        if (DocValuesType.NUMERIC.equals(docValuesType) || 
DocValuesType.SORTED.equals(docValuesType)) {
+          return new DocValuesFieldValueFeatureScorer(this, context,
+                  DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), 
schemaField.getType(), docValuesType);
+          // If type is NONE, this segment has no docs with this field. That's 
not a problem, because we won't call score() anyway

Review comment:
       > ... to find a way to test this. ...
   
   Thinking conceptually (without actually trying it) one way could be like 
this ...
   * imagine `TestFieldValueFeature` had a static inner class 
`FooBarFieldValueFeature extends FieldValueFeature` and for some of its tests 
it uses that instead of the 'real' `FieldValueFeature` class
   * imagine `FooBarFieldValueFeature` can observe whatever needs to be 
observed and capture the observations in class level variables
   * if the class level variables are public or package scope the test has 
access to them
   
   Well, that's the theory but in practice what we wish to observe and test is 
not directly in `FieldValueFeature` but inside the `FieldValueFeatureWeight` 
created, but still it could be possible e.g. something like
   
   ```
   class FooBarFieldValueFeature extends FieldValueFeature {
   
     static AtomicBoolean seenFieldValueFeatureScorer = false; // could also be 
counter instead of boolean
   
     @Override
     public FeatureWeight createWeight(...) throws IOException {
       return new FooBarFieldValueFeatureWeight(...);
     }
   
     public class FooBarFieldValueFeatureWeight extends FieldValueFeatureWeight 
{
       @Override
       public FeatureScorer scorer(LeafReaderContext context) throws 
IOException {
         FeatureScorer featureScorer = super.scorer();
         if (featureScorer instanceof FieldValueFeatureScorer) {
           seenFieldValueFeatureScorer = true;
         }
         return featureScorer;
       }
     }
   }
   ```
   
   structurally and then in the test 
`assertFalse(FooBarFieldValueFeature.seenFieldValueFeatureScorer);`
   
   Does that kind of convey the idea? Essentially the idea is to extend the 
"real" class with a "test" class, for the test class to delegate to the real 
class and for the test class to be "shaped" to suit the test's purposes.

##########
File path: 
solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java
##########
@@ -56,11 +62,13 @@ public void before() throws Exception {
 
     assertU(commit());
 
-    loadFeature("popularity", FieldValueFeature.class.getName(),
-            "{\"field\":\"popularity\"}");
+    for (String field : FIELD_NAMES) {
+      loadFeature(field, FieldValueFeature.class.getName(),
+              "{\"field\":\""+field+"\"}");
 
-    loadModel("popularity-model", LinearModel.class.getName(),
-            new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}");
+      loadModel(field + "-model", LinearModel.class.getName(),
+              new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}");
+    }

Review comment:
       Yes, it's a somewhat quirky behaviour. Operationally it could also cause 
mysteries e.g. when new features are added to an existing store and the 
(reasonable) expectation is that they will start producing feature values right 
away when actually that only happens when the existing model(s) are next 
re-instantiated. +1 to try and reduce such confusion via documentation somehow.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org



---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org
For additional commands, e-mail: issues-h...@solr.apache.org

Reply via email to