cpoerschke commented on a change in pull request #166: URL: https://github.com/apache/solr/pull/166#discussion_r648514785
########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/TestCacheInteractionOfPrefetchingFieldValueFeature.java ########## @@ -0,0 +1,530 @@ +/* * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.misc.document.LazyDocument; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.ltr.feature.FeatureException; +import org.apache.solr.ltr.feature.PrefetchingFieldValueFeature; +import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.apache.solr.ltr.feature.PrefetchingFieldValueFeature.DISABLE_PREFETCHING_FIELD_VALUE_FEATURE; + +public class TestCacheInteractionOfPrefetchingFieldValueFeature extends TestLTROnSolrCloudBase { + private final String LAZY_FIELD_LOADING_CONFIG_KEY = "solr.query.enableLazyFieldLoading"; + + private static final String FEATURE_STORE_NAME = "test"; + private static final int NUM_FEATURES = 6; + private static final String[] FIELD_NAMES = new String[]{"storedIntField", "storedLongField", + "storedFloatField", "storedDoubleField", "storedStrNumField", "storedStrBoolField"}; + private static final String[] FEATURE_NAMES = new String[]{"storedIntFieldFeature", "storedLongFieldFeature", + "storedFloatFieldFeature", "storedDoubleFieldFeature", "storedStrNumFieldFeature", "storedStrBoolFieldFeature"}; + private static final String MODEL_WEIGHTS = "{\"weights\":{\"storedIntFieldFeature\":0.1,\"storedLongFieldFeature\":0.1," + + "\"storedFloatFieldFeature\":0.1,\"storedDoubleFieldFeature\":0.1," + + "\"storedStrNumFieldFeature\":0.1,\"storedStrBoolFieldFeature\":0.1}}"; + + @Override + void setupSolrCluster(int numShards, int numReplicas) throws Exception { + // we do not want to test the scoring / ranking but the interaction with the cache + // because the scoring itself behaves just like the FieldValueFeature + // so just one shard, replica and node serve the purpose + setupSolrCluster(1, 1, 1); + } + + @Test + public void testSimpleQuery() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + assertEquals(loadedFields.size(), queryResponse.getResults().size()); + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + // doc with id 1 has no values set for 3 of the 6 feature fields + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES - 3) + .count()); + } else { + // all the fields were loaded at once + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryLazy() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "true"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + // doc with id 1 has no values set for 3 of the 6 feature fields + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES - 3) + .count()); + } else { + // all the fields were loaded at once + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryBreakPrefetching() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(true); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = + solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + assertEquals(loadedFields.size(), queryResponse.getResults().size()); + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + // doc with id 1 has no values set for 3 of the 6 feature fields + assertEquals(NUM_FEATURES - 3, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == 1) + .count()); + } else { + // each single field used for a feature gets loaded separately + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == 1) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryLazyBreakPrefetching() throws Exception { Review comment: At a glance it's tricky to see how much overlap there is between the tests in this class here but conceptually if two tests `testOdd` and `testEven` are almost identical it could be helpful to factor out a common implementation method to reduce code duplication e.g. ``` public void testOdd() { implTestOddOrEven(true); } public void testEven() { implTestOddOrEven(false); } private void implTestOddOrEven(boolean odd) { ... } ``` ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/TestCacheInteractionOfPrefetchingFieldValueFeature.java ########## @@ -0,0 +1,530 @@ +/* * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.misc.document.LazyDocument; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.ltr.feature.FeatureException; +import org.apache.solr.ltr.feature.PrefetchingFieldValueFeature; +import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.apache.solr.ltr.feature.PrefetchingFieldValueFeature.DISABLE_PREFETCHING_FIELD_VALUE_FEATURE; + +public class TestCacheInteractionOfPrefetchingFieldValueFeature extends TestLTROnSolrCloudBase { + private final String LAZY_FIELD_LOADING_CONFIG_KEY = "solr.query.enableLazyFieldLoading"; + + private static final String FEATURE_STORE_NAME = "test"; + private static final int NUM_FEATURES = 6; + private static final String[] FIELD_NAMES = new String[]{"storedIntField", "storedLongField", + "storedFloatField", "storedDoubleField", "storedStrNumField", "storedStrBoolField"}; + private static final String[] FEATURE_NAMES = new String[]{"storedIntFieldFeature", "storedLongFieldFeature", + "storedFloatFieldFeature", "storedDoubleFieldFeature", "storedStrNumFieldFeature", "storedStrBoolFieldFeature"}; + private static final String MODEL_WEIGHTS = "{\"weights\":{\"storedIntFieldFeature\":0.1,\"storedLongFieldFeature\":0.1," + + "\"storedFloatFieldFeature\":0.1,\"storedDoubleFieldFeature\":0.1," + + "\"storedStrNumFieldFeature\":0.1,\"storedStrBoolFieldFeature\":0.1}}"; + + @Override + void setupSolrCluster(int numShards, int numReplicas) throws Exception { + // we do not want to test the scoring / ranking but the interaction with the cache + // because the scoring itself behaves just like the FieldValueFeature + // so just one shard, replica and node serve the purpose + setupSolrCluster(1, 1, 1); + } + + @Test + public void testSimpleQuery() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); Review comment: minor: Might factoring out a `composeSolrQuery` method similar to how `assertTheResponse` logic is already shared between tests help reduce code duplication? ########## File path: solr/contrib/ltr/src/java/org/apache/solr/ltr/feature/PrefetchingFieldValueFeature.java ########## @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr.feature; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.lucene.document.Document; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.schema.SchemaField; +import org.apache.solr.search.SolrDocumentFetcher; +import org.apache.solr.search.SolrIndexSearcher; + +import java.io.IOException; +import java.util.LinkedHashMap; +import java.util.Map; +import java.util.Set; + +/** + * This feature returns the value of a field in the current document. + * The field must have stored="true" or docValues="true" properties. + * Example configuration: + * <pre>{ + "name": "rawHits", + "class": "org.apache.solr.ltr.feature.FieldValueFeature", + "params": { + "field": "hits" + } +}</pre> + */ +public class PrefetchingFieldValueFeature extends FieldValueFeature { + // used to store all fields from all PrefetchingFieldValueFeatures + private Set<String> prefetchFields; + + public void setField(String field) { + this.field = field; + } + + public void setPrefetchFields(Set<String> fields) { + prefetchFields = fields; + } + + @Override + public LinkedHashMap<String,Object> paramsToMap() { + final LinkedHashMap<String,Object> params = defaultParamsToMap(); + params.put("field", field); + return params; + } + + @Override + protected void validate() throws FeatureException { + if (field == null || field.isEmpty()) { + throw new FeatureException(getClass().getSimpleName()+ + ": field must be provided"); + } + } + + public PrefetchingFieldValueFeature(String name, Map<String,Object> params) { + super(name, params); + } + + @Override + public FeatureWeight createWeight(IndexSearcher searcher, boolean needsScores, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) + throws IOException { + return new PrefetchingFieldValueFeatureWeight(searcher, request, originalQuery, efi); + } + + @VisibleForTesting + public Set<String> getPrefetchFields(){ + return prefetchFields; + } + + public class PrefetchingFieldValueFeatureWeight extends FieldValueFeatureWeight { + private final SchemaField schemaField; + private final SolrDocumentFetcher docFetcher; + + public PrefetchingFieldValueFeatureWeight(IndexSearcher searcher, + SolrQueryRequest request, Query originalQuery, Map<String,String[]> efi) { + super(searcher, request, originalQuery, efi); + if (searcher instanceof SolrIndexSearcher) { + schemaField = ((SolrIndexSearcher) searcher).getSchema().getFieldOrNull(field); + } else { + schemaField = null; + } + this.docFetcher = request.getSearcher().getDocFetcher(); Review comment: > `// get the searcher directly from the request to be sure that we have a SolrIndexSearcher` Yes, that's exactly the kind of comment that will avoid confusion for future code readers, thanks for adding it! ########## File path: solr/contrib/ltr/src/test/org/apache/solr/ltr/TestCacheInteractionOfPrefetchingFieldValueFeature.java ########## @@ -0,0 +1,530 @@ +/* * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.solr.ltr; + +import org.apache.lucene.document.Document; +import org.apache.lucene.document.Field; +import org.apache.lucene.document.StoredField; +import org.apache.lucene.index.LeafReaderContext; +import org.apache.lucene.misc.document.LazyDocument; +import org.apache.lucene.search.DocIdSetIterator; +import org.apache.lucene.search.IndexSearcher; +import org.apache.lucene.search.Query; +import org.apache.solr.client.solrj.SolrQuery; +import org.apache.solr.client.solrj.response.QueryResponse; +import org.apache.solr.common.SolrDocument; +import org.apache.solr.common.SolrInputDocument; +import org.apache.solr.ltr.feature.FeatureException; +import org.apache.solr.ltr.feature.PrefetchingFieldValueFeature; +import org.apache.solr.ltr.model.LinearModel; +import org.apache.solr.request.SolrQueryRequest; +import org.apache.solr.search.SolrDocumentFetcher; +import org.junit.Test; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.Collectors; +import java.util.stream.IntStream; + +import static java.util.stream.Collectors.toList; +import static org.apache.solr.ltr.feature.PrefetchingFieldValueFeature.DISABLE_PREFETCHING_FIELD_VALUE_FEATURE; + +public class TestCacheInteractionOfPrefetchingFieldValueFeature extends TestLTROnSolrCloudBase { + private final String LAZY_FIELD_LOADING_CONFIG_KEY = "solr.query.enableLazyFieldLoading"; + + private static final String FEATURE_STORE_NAME = "test"; + private static final int NUM_FEATURES = 6; + private static final String[] FIELD_NAMES = new String[]{"storedIntField", "storedLongField", + "storedFloatField", "storedDoubleField", "storedStrNumField", "storedStrBoolField"}; + private static final String[] FEATURE_NAMES = new String[]{"storedIntFieldFeature", "storedLongFieldFeature", + "storedFloatFieldFeature", "storedDoubleFieldFeature", "storedStrNumFieldFeature", "storedStrBoolFieldFeature"}; + private static final String MODEL_WEIGHTS = "{\"weights\":{\"storedIntFieldFeature\":0.1,\"storedLongFieldFeature\":0.1," + + "\"storedFloatFieldFeature\":0.1,\"storedDoubleFieldFeature\":0.1," + + "\"storedStrNumFieldFeature\":0.1,\"storedStrBoolFieldFeature\":0.1}}"; + + @Override + void setupSolrCluster(int numShards, int numReplicas) throws Exception { + // we do not want to test the scoring / ranking but the interaction with the cache + // because the scoring itself behaves just like the FieldValueFeature + // so just one shard, replica and node serve the purpose + setupSolrCluster(1, 1, 1); + } + + @Test + public void testSimpleQuery() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "false"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); + + SolrQuery query = new SolrQuery("{!func}sub(8,field(popularity))"); + query.setRequestHandler("/query"); + query.setParam("rows", "8"); + query.setFields("id,features:[fv]"); + query.add("rq", "{!ltr model=powpularityS-model reRankDocs=8}"); + + QueryResponse queryResponse = solrCluster.getSolrClient().query(COLLECTION,query); + + Map<String, List<List<String>>> loadedFields = ObservingPrefetchingFieldValueFeature.loadedFields; + + assertEquals(loadedFields.size(), queryResponse.getResults().size()); + for (SolrDocument doc : queryResponse.getResults()) { + String docId = (String) doc.getFirstValue("id"); + if (docId.equals("1")) { + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + // doc with id 1 has no values set for 3 of the 6 feature fields + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES - 3) + .count()); + } else { + // all the fields were loaded at once + assertEquals(NUM_FEATURES, loadedFields.get(docId).stream() + .filter(fieldLoadedList -> fieldLoadedList.size() == NUM_FEATURES) + .count()); + } + } + assertTheResponse(queryResponse); + } + + @Test + public void testSimpleQueryLazy() throws Exception { + ObservingPrefetchingFieldValueFeature.setBreakPrefetching(false); + ObservingPrefetchingFieldValueFeature.loadedFields = new HashMap<>(); + System.setProperty(LAZY_FIELD_LOADING_CONFIG_KEY, "true"); + // needed to clear cache because we make assertions on its content + reloadCollection(COLLECTION); Review comment: I haven't yet looked in detail w.r.t. how the lazy-loading flag influences the test logic but if the lazy=false and lazy=true tests are identical (or pretty similar) then it might be worth considering to use randomisation e.g. like in https://github.com/apache/lucene-solr/blob/releases/lucene-solr/8.8.2/solr/core/src/test/org/apache/solr/search/CursorMarkTest.java#L54 i.e. before the tests `false` or `true` is chosen and that is then used for the tests. It means that a given run only covers "half" the possibilities but it's also half the code _and_ there would be no need for collection reloading (which could also slow down the test) but yes if cache clearing needs to be achieved we still would have to do that -- might adding/deleting/updating of a document have that effect indirectly or maybe there's some other way to directly clear the document cache in a test? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: issues-unsubscr...@solr.apache.org For additional commands, e-mail: issues-h...@solr.apache.org