tomglk commented on a change in pull request #166: URL: https://github.com/apache/solr/pull/166#discussion_r650366702
########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + + public void setField(String field) { + this.field = field; + } + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @Override + public LinkedHashMap<String,Object> paramsToMap() { + final LinkedHashMap<String,Object> params = defaultParamsToMap(); + params.put("field", field); + return params; + } + + @Override + protected void validate() throws FeatureException { + if (field == null || field.isEmpty()) { + throw new FeatureException(getClass().getSimpleName()+ + ": field must be provided"); + } + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SchemaField schemaField; + private final SolrDocumentFetcher docFetcher; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } + this.docFetcher = request.getSearcher().getDocFetcher(); + } + + /** + * Return a FeatureScorer that work with stored fields and makes use of the cache if the configured field is stored + * and has no docValues. + * Otherwise, delegate the work to the FieldValueFeature. + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + return super.scorer(context); + } + return new PrefetchingFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } + + /** + * A FeatureScorer that reads the stored value for a field + * docFetcher does not request a single field but all the prefetchFields to improve performance through caching + */ + public class PrefetchingFieldValueFeatureScorer extends FieldValueFeatureScorer { + + public PrefetchingFieldValueFeatureScorer(FeatureWeight weight, + LeafReaderContext context, DocIdSetIterator itr) { + super(weight, context, itr); + } + + @Override + public float score() throws IOException { + try { + final Document document = docFetcher.doc(context.docBase + itr.docID(), prefetchFields); + + return super.parseStoredFieldValue(document.getField(field)); + } catch (final IOException e) { + throw new FeatureException( + e.toString() + ": " + + "Unable to extract feature for " + + name, e); Review comment: Thank you for the pointers. :) After trying `paramsToMap()` and `toString()` I opted for overriding `paramsToMap()` because I think it is very useful to see the prefetch fields when performing the get request on the feature store. ( fa1fb82 ) I had to do an adjustment in `paramsToMap()`: If there are no prefetch fields then we have to return an empty set instead of null to prevent a NPE further down the line. While adding tests for `paramsToMap()` I realized, that the `doTestParamsToMap()` test did not work. Two `PrefetchingFieldValueFeatures` are not equal, if one of them is initialized with a params map that is missing the prefetch fields and the other one is initialized with am empty set for the prefetchFields. (Because their params differ and therefore one has prefetchFields = null, the other one has an empty set.) However, the prefetchFields should never be empty / null, because `featuresAsManagedResources()` is called after `preparePrefetchingFieldValueFeatures` where the prefetchFields are initialized. So I would opt to ignore this. If you disagree on ignoring that we could fix it by a) overriding `equals()` (quite a lot of code) or b) always initializing prefetchFields with an empty set. I had to add another setter for the prefetchFields because when the params are loaded from the storage ( https://github.com/apache/lucene-solr/blob/a92a05e195b775b30ca410bc0a26e8e79e7b3bfb/solr/core/src/java/org/apache/solr/rest/ManagedResourceStorage.java#L489 ) the information that the prefetchFields are a Set is lost. ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/TestCacheInteractionOfPrefetchingFieldValueFeature.java ########## @@ -0,0 +1,530 @@ +/* * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.misc.document.LazyDocument; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.ltr.feature.FeatureException; +import org.apache.solr.ltr.feature.PrefetchingFieldValueFeature; +import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.apache.solr.ltr.feature.PrefetchingFieldValueFeature.DISABLE_PREFETCHING_FIELD_VALUE_FEATURE; + +public class TestCacheInteractionOfPrefetchingFieldValueFeature extends TestLTROnSolrCloudBase { + private final String LAZY_FIELD_LOADING_CONFIG_KEY = "solr.query.enableLazyFieldLoading"; + + private static final String FEATURE_STORE_NAME = "test"; + private static final int NUM_FEATURES = 6; + private static final String[] FIELD_NAMES = new String[]{"storedIntField", "storedLongField", + "storedFloatField", "storedDoubleField", "storedStrNumField", "storedStrBoolField"}; + private static final String[] FEATURE_NAMES = new String[]{"storedIntFieldFeature", "storedLongFieldFeature", + "storedFloatFieldFeature", "storedDoubleFieldFeature", "storedStrNumFieldFeature", "storedStrBoolFieldFeature"}; + private static final String MODEL_WEIGHTS = "{\"weights\":{\"storedIntFieldFeature\":0.1,\"storedLongFieldFeature\":0.1," + + "\"storedFloatFieldFeature\":0.1,\"storedDoubleFieldFeature\":0.1," + + "\"storedStrNumFieldFeature\":0.1,\"storedStrBoolFieldFeature\":0.1}}"; + + @Override + void setupSolrCluster(int numShards, int numReplicas) throws Exception { + // we do not want to test the scoring / ranking but the interaction with the cache + // because the scoring itself behaves just like the FieldValueFeature + // so just one shard, replica and node serve the purpose + setupSolrCluster(1, 1, 1); + } + + @Test + public void testSimpleQuery() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + assertEquals(loadedFields.size(), queryResponse.getResults().size()); + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + // doc with id 1 has no values set for 3 of the 6 feature fields + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES - 3) + .count()); + } else { + // all the fields were loaded at once + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryLazy() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "true"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + // doc with id 1 has no values set for 3 of the 6 feature fields + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES - 3) + .count()); + } else { + // all the fields were loaded at once + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryBreakPrefetching() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(true); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = + solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + assertEquals(loadedFields.size(), queryResponse.getResults().size()); + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + // doc with id 1 has no values set for 3 of the 6 feature fields + assertEquals(NUM_FEATURES - 3, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == 1) + .count()); + } else { + // each single field used for a feature gets loaded separately + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == 1) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryLazyBreakPrefetching() throws Exception { Review comment: I extracted some more code and added explanations for the thoughts behind the different tests ( 8d0ab32 ). ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Hi @cpoerschke , after thinking about your suggestion (in the comment below this) for quite some time, I am still not sure what to make from it. To make my thoughts transparent: * yes, the parameter and the switch add some complexity to the code * however, the default user will most likely just stumble upon the param in the exception for the PFVF * in the exception it is explained what to use the param for * the "fallback-fetch" also adds quite some complexity to `score()` because of the try-catch-chain which I find harder to read than the current status * downside of status quo: although being less complex inside `score()`, the parameter distributes its complexity through the whole feature all the way to the query * I like about your idea, that one problematic field does not break all other prefetchFields * this happens in the status quo * however, this fallback means that if one field is broken, we basically have no benefit from the PFVF anymore. If users do only look at errors, they wouldn't even realize it (Sure, you shouldn't do that, but I cannot say how users handle their logs. If an exception is thrown, they at least are quickly aware of the problem) * it bugs me that each PFVFS would try to fetch the broken field (all fields) before falling back to only getting its field. This does not feel efficient * > If we had a fetchDocument method here for this block of code **and did not provide** a disablePrefetching option then the fetchDocument method would provide a hook * I cannot follow this comment of yours. Couldn't a subclass just ignore the `disablePrefetching` when implementing an own `score()` or `fetchDocument()`? (own `fetchDocument()` method was added in 8c5071b ) **The missing piece for me is the following:** **How likely is a failing fetch?** Can it just occur sometimes when some internal problems occur (A) ? Or will it happen all the time for one field that is somehow corrupted (B)? While looking into possible tests for the fallback I tried to get to the core of the IOException. It seemed to be quite deep down (I remember some Byte-Buffers). So I would assume, that A would apply instead of B. In that case, this argument from me > if one field is broken, we basically have no benefit from the PFVF anymore would not apply anymore, because one field would not constantly be broken. These are my thoughts about this topic. I see arguments for both approaches and no showstoppers for either one, that's why I couldn't decide what to do. Do you maybe have some insights about the likelihood of a fetch? Or just general thoughts about it? Maybe I made some assumptions as base for my thoughts which aren't correct? ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); + // get the searcher directly from the request to be sure that we have a SolrIndexSearcher + this.docFetcher = request.getSearcher().getDocFetcher(); + } + + /** + * Return a FeatureScorer that works with stored fields and makes use of the cache if the configured field is stored + * and has no docValues. + * Otherwise, delegate the work to the FieldValueFeature. + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + return super.scorer(context); + } + return new PrefetchingFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } + + /** + * A FeatureScorer that reads the stored value for a field + * docFetcher does not request a single field but all the prefetchFields to improve performance through caching + */ + public class PrefetchingFieldValueFeatureScorer extends FieldValueFeatureScorer { + + public PrefetchingFieldValueFeatureScorer(FeatureWeight weight, + LeafReaderContext context, DocIdSetIterator itr) { + super(weight, context, itr); + } + + @Override + public float score() throws IOException { + try { + final Document document; + if(disablePrefetching) { + document = docFetcher.doc(context.docBase + itr.docID(), getFieldAsSet()); + } else { + document = docFetcher.doc(context.docBase + itr.docID(), prefetchFields); + } Review comment: That is a very good suggestion, thank you! ( 8c5071b ) Enabling other people to customize their Solr (and hopefully contribute their ideas) is really something to keep an eye out for. :) I am impressed by the number of ideas you already have for possible extensions of the class. First thoughts about that: While the disabling on a field name level is possible in subclasses of the PFVF, the disabling based on feature name seems to be a bigger task because we have no access to the names of other features yet. ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: I just answered your comment below about extracting `fetchDocument()` and I think that I did not understand your comment regarding the `disablePrefetchign` correctly until now. Referencing this one: > If we had a fetchDocument method here for this block of code and did not provide a disablePrefetching option then the fetchDocument method would provide a hook Your intention is something like this ```java public static final String DISABLE_PREFETCHING_FIELDS = "disablePrefetchingFieldValueFeatureFields"; private final List<String> disablePrefetching; disablePrefetching = (List<String>) request.getParams().get(DISABLE_PREFETCHING_FIELDS, Collections.emptyList()); ``` And that's why you want to remove the parameter from the PFVF because it does not have to be a boolean is all cases, right? ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: I just answered your comment below about extracting `fetchDocument()` and I think that I did not understand your comment regarding the `disablePrefetchign` correctly until now. Referencing this one: > If we had a fetchDocument method here for this block of code and did not provide a disablePrefetching option then the fetchDocument method would provide a hook Your intention is something like this (in a subclass of PFVF) ```java public static final String DISABLE_PREFETCHING_FIELDS = "disablePrefetchingFieldValueFeatureFields"; private final List<String> disablePrefetching; disablePrefetching = (List<String>) request.getParams().get(DISABLE_PREFETCHING_FIELDS, Collections.emptyList()); ``` And that's why you want to remove the parameter from the PFVF because it does not have to be a boolean is all cases, right? ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: I just answered your comment below about extracting `fetchDocument()` and I think that I did not understand your comment regarding the `disablePrefetchign` correctly until now. Referencing this one: > If we had a fetchDocument method here for this block of code and did not provide a disablePrefetching option then the fetchDocument method would provide a hook Your intention is something like this (in a subclass of PFVF) ```java public static final String DISABLE_PREFETCHING_FIELDS = "disablePrefetchingFieldValueFeatureFields"; private final List<String> disablePrefetching; disablePrefetching = (List<String>) request.getParams().get(DISABLE_PREFETCHING_FIELDS, Collections.emptyList()); ``` And that's why you want to remove the parameter from the PFVF because it does not have to be a boolean in all cases, right? ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Hi @cpoerschke , please do not worry about the length of your responses! I am extremely happy and thankful, that you take the time to share your thoughts and ideas in so much detail and with examples. :) Besides being useful to understand you, it also sparks ideas in my brain and points out aspects that are not on my radar. Because of that collaborating with you is an amazing learning experience! I stumbled upon this question of yours: > how would the document fetcher behave if one of the fields does not exist in the schema or it exists but is not stored and not docValues? or if the field exists but is optional and the document being scored has no value for the field? and added a test for it ( 4d84293 ). No matter if a field is not existing, only indexed or empty, we always return the default. The reason for this is that Lucene iterates over present fields and check which ones are needed, instead of trying to read all wanted fields (https://github.com/apache/lucene/blob/be94a667f2091b2c0fad570f9e726197d466767b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/compressing/Lucene90CompressingStoredFieldsReader.java#L653 and https://github.com/apache/lucene/blob/be94a667f2091b2c0fad570f9e726197d466767b/lucene/core/src/java/org/apache/lucene/document/DocumentStoredFieldVisitor.java#L101). ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Reading through your commit, I am wondering why you split up the `computedPrefetchFields` and the `effectivePrefetchFields`. I do understand the difference between them, but I am not sure what to use them for apart from the output of `paramsToMap()`. Speaking of the output of paramsToMap, I remember this input of yours > if our two hypethical nodes both complained about some features but one complained about fZ, fA, fX, fB and the other about fA, fZ, fB, fX then it might be tricky to interpret that they are both complaining about the same thing I think in terms of debugging, your suggestion to use a sorted set, combined with keeping the field of the feature inside the params.prefetchFields makes the most sense. This would result in duplicated information in the output, but I imagine the user doing stuff like "highlight" => "Strg + F" or searching the logs like "prefetchFields=fA, fB, fX, fZ" which would be impossible if we always remove the `field` from the output. ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Looking at `configuredPrefetchFields` maybe this could be an optional parameter in the features.json? Behaving like `ltr.PrefetchingFieldValueFeature`.~~exclusions~~`.inclusions` on a config level instead of as a query parameter. ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Regarding `ltr.PrefetchingFieldValueFeature.exclusions`: > I don't know if practically additional prefetch fields could work out as costly enough to justify the operational efforts of the outlined ltr.PrefetchingFieldValueFeature.exclusions parameter use I was surprised by your point that multiple models may be using the same store. I just assumed that people would use one feature-store per model. This could really lead to a lot of unnecessary fetching, especially is there is not much overlap between the features of the models. **However**, after taking a step back and writing the test ( 4d84293 ), this `costly enough to justify` thought of you stayed in my mind. The exclusion is not necessary to handle problems with only indexed or not existing fields. Also, to put it very bluntly, I am wondering how much of user's misconfiguration we should try to handle in our code. If there are two models that barely use the same features, which really would result in a lot of unnecessary fetching to the point where performance worsens, I would expect the user to use a separate feature-store per model. Maybe his could even be a hint / recommendation in the documentation. This statement from you: > index corruption is (hopefully) a rare source of errors, and in such a scenario rethrowing the exception with the details would seem most useful. made me want to remove the `disablePrefetchingFieldValueFeature` completely. While implementing it I felt like there would be more value to it, but now it seems like this param would only be useful in a situation where something is really wrong with the index (assuming that 'useful' means that there are quite some `Unable to extract feature...`-exceptions). In that case the failing fetch of the PFVF seems to be the smallest problem besides like being unable to properly search anymore. So I am thinking about: ```java protected Document fetchDocument() throws FeatureException { try { return docFetcher.doc(context.docBase + itr.docID(), prefetchFields); } catch (final IOException e) { final String prefetchedFields = disablePrefetching ? getField() : StrUtils.join(prefetchFields, ','); throw new FeatureException(...); } } ``` What do you think about the above change? We could also provide some fail-safety with your suggested try-catch. (Good structure in the method / comments could make up for the complexity. And maybe log an error instead of a warning?) ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: About: > one thread reading and another updating seems potentially problematic Thank you for having thread-safety / concurrency in mind! This is a topic about which I still have a lot to learn and build an awareness for. To separate the different construction sites we are on, I would like to move the conversation about that to your comment at the bottom. (https://github.com/apache/solr/pull/166#discussion_r650491855) ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/LTRScoringQuery.java ########## @@ -223,6 +223,7 @@ public ModelWeight createWeight(IndexSearcher searcher, ScoreMode scoreMode, flo else{ createWeightsParallel(searcher, scoreMode.needsScores(), featureWeights, features); } + // TODO: potential prefetchFields computation site? Review comment: Hi @cpoerschke , I am glad that this PR sparks your creativity! :) This is why I find it so valuable to have sometimes long conversations. Hearing different opinions or concerns or having to explain some thoughts makes us reevaluate the current point of view which can be really inspiring! I am also having a lot of fun discovering more and more of the internal workings of Solr & Lucene. Thanks for leaving the `TODO` in the code! > less or different visibility into the overall prefetch fields set I think we could still provide the visibility by hooking into `doGet()` of the `ManagedFeatureStore` and collecting all the fields. So that shouldn't even be a problem. I can clearly follow your pros and cons. My only concern is the re-computation per query. If we do the collection in the ManagedFeatureStore, we have to iterate and set the fields approx. once per shard*. If we do it after creating the weights, we have to do it once per query. (*That's what a very quick "debugging" of the procedures while executing `TestCacheInteractionOfPrefetchingFieldValueFeature.testSimpleQuery()` with different shard numbers told me.) I would like to think about the cost of the re-computation vs. the cost of handling the concurrency. ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,134 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.common.util.StrUtils; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; + +import java.io.IOException; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.PrefetchingFieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + // can be used for debugging to only fetch the field this features uses + public static final String DISABLE_PREFETCHING_FIELD_VALUE_FEATURE = "disablePrefetchingFieldValueFeature"; + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SolrDocumentFetcher docFetcher; + private final Boolean disablePrefetching; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + + disablePrefetching = request.getParams().getBool(DISABLE_PREFETCHING_FIELD_VALUE_FEATURE, false); Review comment: Looking at `configuredPrefetchFields` maybe this could be an optional parameter in the features.json? Behaving like `ltr.PrefetchingFieldValueFeature`.~~exclusions~~`.inclusions` on a config level instead of as a query parameter. Apart from such an usage I am not sure about your thoughts behind that. This code here ```java public void setPrefetchFields(Collection<String> fields) { configuredPrefetchFields = fields; // no effectivePrefetchFields update here, only computed fields are effective } ``` confuses me. You say that only the computed fields are the important ones. However the configured fields get updated but are not used anywhere. ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + + public void setField(String field) { + this.field = field; + } + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @Override + public LinkedHashMap<String,Object> paramsToMap() { + final LinkedHashMap<String,Object> params = defaultParamsToMap(); + params.put("field", field); + return params; + } + + @Override + protected void validate() throws FeatureException { + if (field == null || field.isEmpty()) { + throw new FeatureException(getClass().getSimpleName()+ + ": field must be provided"); + } + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SchemaField schemaField; + private final SolrDocumentFetcher docFetcher; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } + this.docFetcher = request.getSearcher().getDocFetcher(); + } + + /** + * Return a FeatureScorer that work with stored fields and makes use of the cache if the configured field is stored + * and has no docValues. + * Otherwise, delegate the work to the FieldValueFeature. + * + * @param context the segment this FeatureScorer is working with + * @return FeatureScorer for the current segment and field + * @throws IOException as defined by abstract class Feature + */ + @Override + public FeatureScorer scorer(LeafReaderContext context) throws IOException { + if (schemaField != null && !schemaField.stored() && schemaField.hasDocValues()) { + return super.scorer(context); + } + return new PrefetchingFieldValueFeatureScorer(this, context, + DocIdSetIterator.all(DocIdSetIterator.NO_MORE_DOCS)); + } + + /** + * A FeatureScorer that reads the stored value for a field + * docFetcher does not request a single field but all the prefetchFields to improve performance through caching + */ + public class PrefetchingFieldValueFeatureScorer extends FieldValueFeatureScorer { + + public PrefetchingFieldValueFeatureScorer(FeatureWeight weight, + LeafReaderContext context, DocIdSetIterator itr) { + super(weight, context, itr); + } + + @Override + public float score() throws IOException { + try { + final Document document = docFetcher.doc(context.docBase + itr.docID(), prefetchFields); + + return super.parseStoredFieldValue(document.getField(field)); + } catch (final IOException e) { + throw new FeatureException( + e.toString() + ": " + + "Unable to extract feature for " + + name, e); Review comment: I talked about your commit in a comment below ( https://github.com/apache/solr/pull/166#discussion_r650527430 and following ), so I'm just leaving the pointer here. :) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org