cpoerschke commented on a change in pull request #123: URL: https://github.com/apache/solr/pull/123#discussion_r633135019
########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java ########## @@ -56,11 +62,13 @@ public void before() throws Exception { assertU(commit()); - loadFeature("popularity", FieldValueFeature.class.getName(), - "{\"field\":\"popularity\"}"); + for (String field : FIELD_NAMES) { + loadFeature(field, FieldValueFeature.class.getName(), + "{\"field\":\""+field+"\"}"); - loadModel("popularity-model", LinearModel.class.getName(), - new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}"); + loadModel(field + "-model", LinearModel.class.getName(), + new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}"); + } Review comment: 2/2 ... I think the explanation for the mystery is here i.e. the `LinearModel` constructor signature and the interleaving of feature and model loading results in the models having different definitions of what `allFeatures` means. One solution then would be for the test expectations to account for the multiple meanings of `allFeatures` or (my preference probably) for there to be only one `allFeatures` meaning e.g. via the separation of feature and model loading. Or perhaps you can think of a third solution even? ``` for (String field : FIELD_NAMES) { loadFeature(field, ...); } for (String field : FIELD_NAMES) { loadModel(field + "-model", ...); } ``` ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java ########## @@ -70,86 +78,92 @@ public void after() throws Exception { @Test public void testRanking() throws Exception { - - final SolrQuery query = new SolrQuery(); - query.setQuery("title:w1"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - // Normal term match - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); - - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); - - query.setQuery("*:*"); - query.remove("rows"); - query.add("rows", "8"); - query.remove("rq"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=8}"); - - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + for (String field : FIELD_NAMES) { + + final SolrQuery query = new SolrQuery(); + query.setQuery("title:w1"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + // Normal term match + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='1'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='7'"); + + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==4"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='1'"); + + query.setQuery("*:*"); + query.remove("rows"); + query.add("rows", "8"); + query.remove("rq"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=8}"); + + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='8'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[1]/id=='7'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[2]/id=='6'"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[3]/id=='5'"); + } } @Test public void testIfADocumentDoesntHaveAFieldDefaultValueIsReturned() throws Exception { - SolrQuery query = new SolrQuery(); - query.setQuery("id:42"); - query.add("fl", "*, score"); - query.add("rows", "4"); - - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); - query = new SolrQuery(); - query.setQuery("id:42"); - query.add("rq", "{!ltr model=popularity-model reRankDocs=4}"); - query.add("fl", "[fv]"); - assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); - assertJQ("/query" + query.toQueryString(), - "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector("popularity",Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); - + for (String field : FIELD_NAMES) { + SolrQuery query = new SolrQuery(); + query.setQuery("id:42"); + query.add("fl", "*, score"); + query.add("rows", "4"); + + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), "/response/docs/[0]/id=='42'"); + query = new SolrQuery(); + query.setQuery("id:42"); + query.add("rq", "{!ltr model="+field+"-model reRankDocs=4}"); + query.add("fl", "[fv]"); + assertJQ("/query" + query.toQueryString(), "/response/numFound/==1"); + assertJQ("/query" + query.toQueryString(), + "/response/docs/[0]/=={'[fv]':'"+FeatureLoggerTestUtils.toFeatureVector(field,Float.toString(FIELD_VALUE_FEATURE_DEFAULT_VAL))+"'}"); Review comment: 1/2 Thanks for investigating in detail here! I agree the explanation is with the `extractAllFeatures` boolean i.e. the response contains the values for all the features in the feature store. The puzzling thing then though is why the first run of the loop succeeds and only subsequent runs fail. Since all loop runs use the same feature store and surely each loop run should return the same feature values? ... ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/FieldValueFeature.java ########## @@ -106,18 +106,29 @@ public FieldValueFeatureWeight(IndexSearcher searcher, /** * Return a FeatureScorer that uses docValues or storedFields if no docValues are present + * * @param context the segment this FeatureScorer is working with * @return FeatureScorer for the current segment and field * @throws IOException as defined by abstract class Feature */ @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { - return new DocValuesFieldValueFeatureScorer(this, context, - DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType()); + + FieldInfo fieldInfo = context.reader().getFieldInfos().fieldInfo(field); + DocValuesType docValuesType = fieldInfo != null ? fieldInfo.getDocValuesType() : DocValuesType.NONE; + + if (DocValuesType.NUMERIC.equals(docValuesType) || DocValuesType.SORTED.equals(docValuesType)) { + return new DocValuesFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS), schemaField.getType(), docValuesType); + // If type is NONE, this segment has no docs with this field. That's not a problem, because we won't call score() anyway Review comment: > ... to find a way to test this. ... Thinking conceptually (without actually trying it) one way could be like this ... * imagine `TestFieldValueFeature` had a static inner class `FooBarFieldValueFeature extends FieldValueFeature` and for some of its tests it uses that instead of the 'real' `FieldValueFeature` class * imagine `FooBarFieldValueFeature` can observe whatever needs to be observed and capture the observations in class level variables * if the class level variables are public or package scope the test has access to them Well, that's the theory but in practice what we wish to observe and test is not directly in `FieldValueFeature` but inside the `FieldValueFeatureWeight` created, but still it could be possible e.g. something like ``` class FooBarFieldValueFeature extends FieldValueFeature { static AtomicBoolean seenFieldValueFeatureScorer = false; // could also be counter instead of boolean @Override public FeatureWeight createWeight(...) throws IOException { return new FooBarFieldValueFeatureWeight(...); } public class FooBarFieldValueFeatureWeight extends FieldValueFeatureWeight { @Override public FeatureScorer scorer(LeafReaderContext context) throws IOException { FeatureScorer featureScorer = super.scorer(); if (featureScorer instanceof FieldValueFeatureScorer) { seenFieldValueFeatureScorer = true; } return featureScorer; } } } ``` structurally and then in the test `assertFalse(FooBarFieldValueFeature.seenFieldValueFeatureScorer);` Does that kind of convey the idea? Essentially the idea is to extend the "real" class with a "test" class, for the test class to delegate to the real class and for the test class to be "shaped" to suit the test's purposes. ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/feature/TestFieldValueFeature.java ########## @@ -56,11 +62,13 @@ public void before() throws Exception { assertU(commit()); - loadFeature("popularity", FieldValueFeature.class.getName(), - "{\"field\":\"popularity\"}"); + for (String field : FIELD_NAMES) { + loadFeature(field, FieldValueFeature.class.getName(), + "{\"field\":\""+field+"\"}"); - loadModel("popularity-model", LinearModel.class.getName(), - new String[] {"popularity"}, "{\"weights\":{\"popularity\":1.0}}"); + loadModel(field + "-model", LinearModel.class.getName(), + new String[] {field}, "{\"weights\":{\""+field+"\":1.0}}"); + } Review comment: Yes, it's a somewhat quirky behaviour. Operationally it could also cause mysteries e.g. when new features are added to an existing store and the (reasonable) expectation is that they will start producing feature values right away when actually that only happens when the existing model(s) are next re-instantiated. +1 to try and reduce such confusion via documentation somehow. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org