Author: mkataria Date: Wed Apr 15 10:40:09 2020 New Revision: 1876544 URL: http://svn.apache.org/viewvc?rev=1876544&view=rev Log: OAK-8998: fast statistics cost estimation for oak-search-elastic (patch by Fabrizio)
Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java (with props) jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java (with props) jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/ jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker (with props) Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatistics.java jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java?rev=1876544&r1=1876543&r2=1876544&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexNode.java Wed Apr 15 10:40:09 2020 @@ -60,6 +60,6 @@ class ElasticsearchIndexNode implements @Override public @Nullable IndexStatistics getIndexStatistics() { - return new ElasticsearchIndexStatistics(elasticsearchConnection); + return new ElasticsearchIndexStatistics(elasticsearchConnection, indexDefinition); } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatistics.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatistics.java?rev=1876544&r1=1876543&r2=1876544&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatistics.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/main/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatistics.java Wed Apr 15 10:40:09 2020 @@ -16,46 +16,155 @@ */ package org.apache.jackrabbit.oak.plugins.index.elasticsearch.query; +import com.google.common.base.Ticker; +import com.google.common.cache.CacheBuilder; +import com.google.common.cache.CacheLoader; +import com.google.common.cache.LoadingCache; +import com.google.common.util.concurrent.ListenableFuture; +import com.google.common.util.concurrent.ListenableFutureTask; import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; import org.apache.jackrabbit.oak.plugins.index.search.IndexStatistics; import org.elasticsearch.client.RequestOptions; import org.elasticsearch.client.core.CountRequest; import org.elasticsearch.client.core.CountResponse; import org.elasticsearch.index.query.QueryBuilders; import org.jetbrains.annotations.NotNull; +import org.jetbrains.annotations.Nullable; +import org.jetbrains.annotations.TestOnly; import java.io.IOException; - +import java.util.Objects; +import java.util.concurrent.Executors; +import java.util.concurrent.TimeUnit; + +/** + * Cache-based {@code IndexStatistics} implementation providing statistics for Elasticsearch reducing + * network operations. + * <p> + * By default, the cache can contain a max of 10000 entries, statistic values expire after 10 minutes but are refreshed + * in background when accessed after 1 minute. These values can be overwritten with the following system properties: + * + * <ul> + * <li>{@code oak.elastic.statsMaxSize}</li> + * <li>{@code oak.elastic.statsExpireMin}</li> + * <li>{@code oak.elastic.statsRefreshMin}</li> + * </ul> + */ class ElasticsearchIndexStatistics implements IndexStatistics { + + private static final Long MAX_SIZE = Long.getLong("oak.elastic.statsMaxSize", 10000); + private static final Long EXPIRE_MIN = Long.getLong("oak.elastic.statsExpireMin", 10); + private static final Long REFRESH_MIN = Long.getLong("oak.elastic.statsRefreshMin", 1); + + private static final LoadingCache<CountRequestDescriptor, Integer> DEFAULT_STATS_CACHE = + setupCache(MAX_SIZE, EXPIRE_MIN, REFRESH_MIN, null); + private final ElasticsearchConnection elasticsearchConnection; + private final ElasticsearchIndexDefinition indexDefinition; + private final LoadingCache<CountRequestDescriptor, Integer> statsCache; - ElasticsearchIndexStatistics(@NotNull ElasticsearchConnection elasticsearchConnection) { + ElasticsearchIndexStatistics(@NotNull ElasticsearchConnection elasticsearchConnection, + @NotNull ElasticsearchIndexDefinition indexDefinition) { + this(elasticsearchConnection, indexDefinition, DEFAULT_STATS_CACHE); + } + + @TestOnly + ElasticsearchIndexStatistics(@NotNull ElasticsearchConnection elasticsearchConnection, + @NotNull ElasticsearchIndexDefinition indexDefinition, + @NotNull LoadingCache<CountRequestDescriptor, Integer> statsCache) { this.elasticsearchConnection = elasticsearchConnection; + this.indexDefinition = indexDefinition; + this.statsCache = statsCache; } + /** + * Returns the approximate number of documents for the remote index bound to the {@code ElasticsearchIndexDefinition}. + */ @Override public int numDocs() { - CountRequest countRequest = new CountRequest(); - countRequest.query(QueryBuilders.matchAllQuery()); - try { - CountResponse count = elasticsearchConnection.getClient().count(countRequest, RequestOptions.DEFAULT); - return (int) count.getCount(); - } catch (IOException e) { - // ignore failure - return 100000; - } + return statsCache.getUnchecked( + new CountRequestDescriptor(elasticsearchConnection, indexDefinition.getRemoteIndexName(), null) + ); } + /** + * Returns the approximate number of documents for the {@code field} in the remote index bound to the + * {@code ElasticsearchIndexDefinition}. + */ @Override - public int getDocCountFor(String key) { - CountRequest countRequest = new CountRequest(); - countRequest.query(QueryBuilders.existsQuery(key)); - try { - CountResponse count = elasticsearchConnection.getClient().count(countRequest, RequestOptions.DEFAULT); - return (int) count.getCount(); - } catch (IOException e) { - // ignore failure - return 1000; + public int getDocCountFor(String field) { + return statsCache.getUnchecked( + new CountRequestDescriptor(elasticsearchConnection, indexDefinition.getRemoteIndexName(), field) + ); + } + + static LoadingCache<CountRequestDescriptor, Integer> setupCache(long maxSize, long expireMin, long refreshMin, + @Nullable Ticker ticker) { + CacheBuilder<Object, Object> cacheBuilder = CacheBuilder.newBuilder() + .maximumSize(maxSize) + .expireAfterWrite(expireMin, TimeUnit.MINUTES) + // https://github.com/google/guava/wiki/CachesExplained#refresh + .refreshAfterWrite(refreshMin, TimeUnit.MINUTES); + if (ticker != null) { + cacheBuilder.ticker(ticker); + } + return cacheBuilder + .build(new CacheLoader<CountRequestDescriptor, Integer>() { + @Override + public Integer load(CountRequestDescriptor countRequestDescriptor) throws IOException { + return count(countRequestDescriptor); + } + + @Override + public ListenableFuture<Integer> reload(CountRequestDescriptor crd, Integer oldValue) { + ListenableFutureTask<Integer> task = ListenableFutureTask.create(() -> count(crd)); + Executors.newSingleThreadExecutor().execute(task); + return task; + } + }); + } + + private static int count(CountRequestDescriptor crd) throws IOException { + CountRequest countRequest = new CountRequest(crd.index); + if (crd.field != null) { + countRequest.query(QueryBuilders.existsQuery(crd.field)); + } else { + countRequest.query(QueryBuilders.matchAllQuery()); + } + + CountResponse response = crd.connection.getClient().count(countRequest, RequestOptions.DEFAULT); + return (int) response.getCount(); + } + + static class CountRequestDescriptor { + + @NotNull + final ElasticsearchConnection connection; + @NotNull + final String index; + @Nullable + final String field; + + public CountRequestDescriptor(@NotNull ElasticsearchConnection connection, + @NotNull String index, @Nullable String field) { + this.connection = connection; + this.index = index; + this.field = field; + } + + @Override + public boolean equals(Object o) { + if (this == o) return true; + if (o == null || getClass() != o.getClass()) return false; + CountRequestDescriptor that = (CountRequestDescriptor) o; + return index.equals(that.index) && + Objects.equals(field, that.field); + } + + @Override + public int hashCode() { + return Objects.hash(index, field); } } } Modified: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java?rev=1876544&r1=1876543&r2=1876544&view=diff ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java (original) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchPropertyIndexTest.java Wed Apr 15 10:40:09 2020 @@ -215,34 +215,6 @@ public class ElasticsearchPropertyIndexT } private static void assertEventually(Runnable r) { - assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 3); - } - - private static void assertEventually(Runnable r, long timeoutMillis) { - final long start = System.currentTimeMillis(); - long lastAttempt = 0; - int attempts = 0; - - while (true) { - try { - attempts++; - lastAttempt = System.currentTimeMillis(); - r.run(); - return; - } catch (Throwable e) { - long elapsedTime = lastAttempt - start; - if (elapsedTime >= timeoutMillis) { - String msg = String.format("Condition not satisfied after %1.2f seconds and %d attempts", - elapsedTime / 1000d, attempts); - throw new AssertionError(msg, e); - } - try { - Thread.sleep(100); - } catch (InterruptedException ex) { - ex.printStackTrace(); - } - - } - } + ElasticsearchTestUtils.assertEventually(r, BULK_FLUSH_INTERVAL_MS_DEFAULT * 3); } } Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java?rev=1876544&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java (added) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java Wed Apr 15 10:40:09 2020 @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.elasticsearch; + +public final class ElasticsearchTestUtils { + + public static void assertEventually(Runnable r, long timeoutMillis) { + final long start = System.currentTimeMillis(); + long lastAttempt = 0; + int attempts = 0; + + while (true) { + try { + attempts++; + lastAttempt = System.currentTimeMillis(); + r.run(); + return; + } catch (Throwable e) { + long elapsedTime = lastAttempt - start; + if (elapsedTime >= timeoutMillis) { + String msg = String.format("Condition not satisfied after %1.2f seconds and %d attempts", + elapsedTime / 1000d, attempts); + throw new AssertionError(msg, e); + } + try { + Thread.sleep(100); + } catch (InterruptedException ex) { + ex.printStackTrace(); + } + + } + } + } +} Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/ElasticsearchTestUtils.java ------------------------------------------------------------------------------ svn:eol-style = native Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java?rev=1876544&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java (added) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java Wed Apr 15 10:40:09 2020 @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.plugins.index.elasticsearch.query; + +import com.google.common.base.Ticker; +import com.google.common.cache.LoadingCache; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchConnection; +import org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchIndexDefinition; +import org.elasticsearch.client.RequestOptions; +import org.elasticsearch.client.RestHighLevelClient; +import org.elasticsearch.client.core.CountRequest; +import org.elasticsearch.client.core.CountResponse; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.time.Duration; + +import static org.apache.jackrabbit.oak.plugins.index.elasticsearch.ElasticsearchTestUtils.assertEventually; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; +import static org.mockito.AdditionalAnswers.answersWithDelay; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.mock; +import static org.mockito.Mockito.times; +import static org.mockito.Mockito.verify; +import static org.mockito.Mockito.verifyNoMoreInteractions; +import static org.mockito.Mockito.when; + +public class ElasticsearchIndexStatisticsTest { + + @Mock + private ElasticsearchConnection elasticsearchConnectionMock; + + @Mock + private ElasticsearchIndexDefinition indexDefinitionMock; + + @Mock + private RestHighLevelClient elasticClientMock; + + @Before + public void setUp() { + MockitoAnnotations.initMocks(this); + when(indexDefinitionMock.getRemoteIndexName()).thenReturn("test-index"); + when(elasticsearchConnectionMock.getClient()).thenReturn(elasticClientMock); + } + + @Test + public void defaultIndexStatistics() { + ElasticsearchIndexStatistics indexStatistics = + new ElasticsearchIndexStatistics(elasticsearchConnectionMock, indexDefinitionMock); + assertNotNull(indexStatistics); + } + + @Test + public void cachedStatistics() throws Exception { + MutableTicker ticker = new MutableTicker(); + LoadingCache<ElasticsearchIndexStatistics.CountRequestDescriptor, Integer> cache = + ElasticsearchIndexStatistics.setupCache(100, 10, 1, ticker); + ElasticsearchIndexStatistics indexStatistics = + new ElasticsearchIndexStatistics(elasticsearchConnectionMock, indexDefinitionMock, cache); + + CountResponse countResponse = mock(CountResponse.class); + when(countResponse.getCount()).thenReturn(100L); + + // simulate some delay when invoking elastic + when(elasticClientMock.count(any(CountRequest.class), any(RequestOptions.class))) + .then(answersWithDelay(250, i -> countResponse)); + + // cache miss, read data from elastic + assertEquals(100, indexStatistics.numDocs()); + verify(elasticClientMock).count(any(CountRequest.class), any(RequestOptions.class)); + + // index count changes in elastic + when(countResponse.getCount()).thenReturn(1000L); + + // cache hit, old value returned + assertEquals(100, indexStatistics.numDocs()); + verifyNoMoreInteractions(elasticClientMock); + + // move cache time ahead of 2 minutes, cache reload time expired + ticker.tick(Duration.ofMinutes(2)); + // old value is returned, read fresh data from elastic in background + assertEquals(100, indexStatistics.numDocs()); + + assertEventually(() -> { + try { + verify(elasticClientMock, times(2)).count(any(CountRequest.class), any(RequestOptions.class)); + } catch (IOException e) { + fail(e.getMessage()); + } + // cache hit, latest value returned + assertEquals(1000, indexStatistics.numDocs()); + }, 500); + verifyNoMoreInteractions(elasticClientMock); + + // index count changes in elastic + when(countResponse.getCount()).thenReturn(5000L); + + // move cache time ahead of 15 minutes, cache value expired + ticker.tick(Duration.ofMinutes(15)); + + // cache miss, read data from elastic + assertEquals(5000, indexStatistics.numDocs()); + verify(elasticClientMock, times(3)).count(any(CountRequest.class), any(RequestOptions.class)); + } + + private static class MutableTicker extends Ticker { + + private long nanoOffset = 0; + + @Override + public long read() { + return systemTicker().read() + nanoOffset; + } + + public void tick(Duration duration) { + nanoOffset = duration.toNanos(); + } + } + +} Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/java/org/apache/jackrabbit/oak/plugins/index/elasticsearch/query/ElasticsearchIndexStatisticsTest.java ------------------------------------------------------------------------------ svn:eol-style = native Added: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker?rev=1876544&view=auto ============================================================================== --- jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker (added) +++ jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker Wed Apr 15 10:40:09 2020 @@ -0,0 +1,15 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +mock-maker-inline \ No newline at end of file Propchange: jackrabbit/oak/trunk/oak-search-elastic/src/test/resources/mockito-extensions/org.mockito.plugins.MockMaker ------------------------------------------------------------------------------ svn:eol-style = native