This is an automated email from the ASF dual-hosted git repository. juanpablo pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/jspwiki.git
commit 7a7a588e72ee32ff0e5e6a2f366af60c3326f770 Author: Juan Pablo Santos RodrÃguez <[email protected]> AuthorDate: Wed Mar 10 00:09:01 2021 +0100 code format & clean-up as suggested by IntelliJ --- .../wiki/search/kendra/KendraSearchProvider.java | 920 ++++++++++----------- .../search/kendra/KendraSearchProviderTest.java | 382 ++++----- 2 files changed, 641 insertions(+), 661 deletions(-) diff --git a/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java b/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java index cbf87d3..658ecf2 100644 --- a/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java +++ b/jspwiki-kendra-searchprovider/src/main/java/org/apache/wiki/search/kendra/KendraSearchProvider.java @@ -18,22 +18,13 @@ */ package org.apache.wiki.search.kendra; -import static java.lang.String.format; - -import java.io.IOException; -import java.io.InputStream; -import java.io.InputStreamReader; -import java.lang.reflect.Type; -import java.nio.ByteBuffer; -import java.nio.charset.StandardCharsets; -import java.util.ArrayList; -import java.util.Collection; -import java.util.Collections; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.Properties; - +import com.amazonaws.services.kendra.AWSkendra; +import com.amazonaws.services.kendra.AWSkendraClientBuilder; +import com.amazonaws.services.kendra.model.*; +import com.amazonaws.util.IOUtils; +import com.google.gson.Gson; +import com.google.gson.GsonBuilder; +import com.google.gson.reflect.TypeToken; import org.apache.commons.io.FilenameUtils; import org.apache.commons.lang3.StringUtils; import org.apache.log4j.Logger; @@ -56,35 +47,15 @@ import org.apache.wiki.pages.PageManager; import org.apache.wiki.search.SearchProvider; import org.apache.wiki.util.TextUtil; -import com.amazonaws.services.kendra.AWSkendra; -import com.amazonaws.services.kendra.AWSkendraClientBuilder; -import com.amazonaws.services.kendra.model.BatchDeleteDocumentRequest; -import com.amazonaws.services.kendra.model.BatchDeleteDocumentResult; -import com.amazonaws.services.kendra.model.BatchPutDocumentRequest; -import com.amazonaws.services.kendra.model.BatchPutDocumentResponseFailedDocument; -import com.amazonaws.services.kendra.model.BatchPutDocumentResult; -import com.amazonaws.services.kendra.model.ContentType; -import com.amazonaws.services.kendra.model.DataSourceSummary; -import com.amazonaws.services.kendra.model.Document; -import com.amazonaws.services.kendra.model.DocumentAttribute; -import com.amazonaws.services.kendra.model.DocumentAttributeValue; -import com.amazonaws.services.kendra.model.IndexConfigurationSummary; -import com.amazonaws.services.kendra.model.ListDataSourcesRequest; -import com.amazonaws.services.kendra.model.ListDataSourcesResult; -import com.amazonaws.services.kendra.model.ListIndicesRequest; -import com.amazonaws.services.kendra.model.ListIndicesResult; -import com.amazonaws.services.kendra.model.QueryRequest; -import com.amazonaws.services.kendra.model.QueryResultItem; -import com.amazonaws.services.kendra.model.QueryResultType; -import com.amazonaws.services.kendra.model.ScoreConfidence; -import com.amazonaws.services.kendra.model.StartDataSourceSyncJobRequest; -import com.amazonaws.services.kendra.model.StartDataSourceSyncJobResult; -import com.amazonaws.services.kendra.model.StopDataSourceSyncJobRequest; -import com.amazonaws.services.kendra.model.ThrottlingException; -import com.amazonaws.util.IOUtils; -import com.google.gson.Gson; -import com.google.gson.GsonBuilder; -import com.google.gson.reflect.TypeToken; +import java.io.IOException; +import java.io.InputStream; +import java.io.InputStreamReader; +import java.lang.reflect.Type; +import java.nio.ByteBuffer; +import java.nio.charset.StandardCharsets; +import java.util.*; + +import static java.lang.String.format; /** * Search provider that implements {link SearchProvider} using AWS Kendra for @@ -98,473 +69,474 @@ import com.google.gson.reflect.TypeToken; */ public class KendraSearchProvider implements SearchProvider { - private static final Logger LOG = Logger.getLogger(KendraSearchProvider.class); - private Engine engine; - private Properties properties; - private Map<String, Object> contentTypes; - private AWSkendra kendra; - private String indexName; - private String indexId; - private String dataSourceName; - private String dataSourceId; - - private List<Page> updates = Collections.synchronizedList(new ArrayList<>()); - - private static final String PROP_KENDRA_INDEX_NAME = "jspwiki.kendra.indexName"; - private static final String PROP_KENDRA_DATA_SOURCE_NAME = "jspwiki.kendra.dataSourceName"; - private static final String PROP_KENDRA_INDEXDELAY = "jspwiki.kendra.indexdelay"; - private static final String PROP_KENDRA_INITIALDELAY = "jspwiki.kendra.initialdelay"; - - public KendraSearchProvider() { - } - - /** - * {@inheritDoc} - */ - @Override - public void initialize(Engine engine, Properties properties) throws NoRequiredPropertyException, IOException { - this.engine = engine; - this.properties = properties; - this.contentTypes = getContentTypes(); - - setKendra(buildClient()); - - this.indexName = TextUtil.getRequiredProperty(this.properties, PROP_KENDRA_INDEX_NAME); - this.dataSourceName = TextUtil.getRequiredProperty(this.properties, PROP_KENDRA_DATA_SOURCE_NAME); - int initialDelay = TextUtil.getIntegerProperty(this.properties, PROP_KENDRA_INITIALDELAY, - KendraUpdater.INITIAL_DELAY); - int indexDelay = TextUtil.getIntegerProperty(this.properties, PROP_KENDRA_INDEXDELAY, KendraUpdater.INDEX_DELAY); - - // Start the Kendra update thread, which waits first for a little while - // before starting to go through the "pages that need updating". - if (initialDelay >= 0) { - KendraUpdater updater = new KendraUpdater(engine, this, initialDelay, indexDelay); - updater.start(); - } - } - - private Map<String, Object> getContentTypes() { - Gson gson = new GsonBuilder().create(); - try (InputStream in = KendraSearchProvider.class.getResourceAsStream("content_types.json")) { - if (in != null) { - Type collectionType = new TypeToken<HashMap<String, Object>>(){}.getType(); - return gson.fromJson(new InputStreamReader(in), collectionType); - } - } catch (IOException e) { - LOG.error(format("Unable to load default propertyfile 'content_types.json': %s", e.getMessage()), e); + private static final Logger LOG = Logger.getLogger( KendraSearchProvider.class ); + private Engine engine; + private Properties properties; + private Map< String, Object > contentTypes; + private AWSkendra kendra; + private String indexName; + private String indexId; + private String dataSourceName; + private String dataSourceId; + + private final List< Page > updates = Collections.synchronizedList( new ArrayList<>() ); + + private static final String PROP_KENDRA_INDEX_NAME = "jspwiki.kendra.indexName"; + private static final String PROP_KENDRA_DATA_SOURCE_NAME = "jspwiki.kendra.dataSourceName"; + private static final String PROP_KENDRA_INDEXDELAY = "jspwiki.kendra.indexdelay"; + private static final String PROP_KENDRA_INITIALDELAY = "jspwiki.kendra.initialdelay"; + + public KendraSearchProvider() { } - return null; - } - - /** - * {@inheritDoc} - */ - @Override - public String getProviderInfo() { - return "KendraSearchProvider"; - } - - /** - * {@inheritDoc} - */ - @Override - public void pageRemoved(Page page) { - String pageName = page.getName(); - BatchDeleteDocumentRequest request = new BatchDeleteDocumentRequest().withIndexId(indexId) - .withDocumentIdList(pageName); - BatchDeleteDocumentResult result = getKendra().batchDeleteDocument(request); - if (result.getFailedDocuments().size() == 0) { - LOG.debug(format("Page '%s' was removed from index", pageName)); - } else { - LOG.error(format("Failed to remove Page '%s' from index", pageName)); + + /** + * {@inheritDoc} + */ + @Override + public void initialize( final Engine engine, final Properties properties ) throws NoRequiredPropertyException, IOException { + this.engine = engine; + this.properties = properties; + this.contentTypes = getContentTypes(); + + setKendra( buildClient() ); + + this.indexName = TextUtil.getRequiredProperty( this.properties, PROP_KENDRA_INDEX_NAME ); + this.dataSourceName = TextUtil.getRequiredProperty( this.properties, PROP_KENDRA_DATA_SOURCE_NAME ); + final int initialDelay = TextUtil.getIntegerProperty( this.properties, PROP_KENDRA_INITIALDELAY, + KendraUpdater.INITIAL_DELAY ); + final int indexDelay = TextUtil.getIntegerProperty( this.properties, PROP_KENDRA_INDEXDELAY, KendraUpdater.INDEX_DELAY ); + + // Start the Kendra update thread, which waits first for a little while + // before starting to go through the "pages that need updating". + if ( initialDelay >= 0 ) { + final KendraUpdater updater = new KendraUpdater( engine, this, initialDelay, indexDelay ); + updater.start(); + } } - } - - /** - * {@inheritDoc} - */ - @Override - public void reindexPage(Page page) { - if (page != null) { - updates.add(page); - LOG.debug(format("Scheduling page '%s' for indexing ...", page.getName())); + + private Map< String, Object > getContentTypes() { + final Gson gson = new GsonBuilder().create(); + try ( final InputStream in = KendraSearchProvider.class.getResourceAsStream( "content_types.json" ) ) { + if ( in != null ) { + final Type collectionType = new TypeToken< HashMap< String, Object > >() { + }.getType(); + return gson.fromJson( new InputStreamReader( in ), collectionType ); + } + } catch ( final IOException e ) { + LOG.error( format( "Unable to load default propertyfile 'content_types.json': %s", e.getMessage() ), e ); + } + return null; } - } - - /** - * {@inheritDoc} - */ - @Override - public Collection<SearchResult> findPages(String query, Context wikiContext) throws ProviderException, IOException { - QueryRequest request = new QueryRequest().withIndexId(indexId).withQueryText(query); - List<QueryResultItem> items = null; - try { - items = getKendra().query(request).getResultItems(); - } catch (ThrottlingException e) { - LOG.error(format("ThrottlingException. Skipping...")); - return new ArrayList<>(); + + /** + * {@inheritDoc} + */ + @Override + public String getProviderInfo() { + return "KendraSearchProvider"; } - List<SearchResult> searchResults = new ArrayList<>(items.size()); - AuthorizationManager mgr = engine.getManager(AuthorizationManager.class); - - for (QueryResultItem item : items) { - switch (QueryResultType.fromValue(item.getType())) { - case DOCUMENT: - String documentId = item.getDocumentId(); - String documentExcerpt = item.getDocumentExcerpt().getText(); - String scoreConfidence = item.getScoreAttributes().getScoreConfidence(); - Page page = this.engine.getManager(PageManager.class).getPage(documentId, PageProvider.LATEST_VERSION); - if (page != null) { - PagePermission pp = new PagePermission(page, PagePermission.VIEW_ACTION); - if (mgr.checkPermission(wikiContext.getWikiSession(), pp)) { - SearchResult searchResult = new SearchResultImpl(page, confidence2score(scoreConfidence), - new String[] { documentExcerpt }); - searchResults.add(searchResult); - } else { - LOG.error(format("Page '%s' is not accessible", documentId)); - } + + /** + * {@inheritDoc} + */ + @Override + public void pageRemoved( final Page page ) { + final String pageName = page.getName(); + final BatchDeleteDocumentRequest request = new BatchDeleteDocumentRequest().withIndexId( indexId ) + .withDocumentIdList( pageName ); + final BatchDeleteDocumentResult result = getKendra().batchDeleteDocument( request ); + if ( result.getFailedDocuments().size() == 0 ) { + LOG.debug( format( "Page '%s' was removed from index", pageName ) ); } else { - LOG.error( - format("Kendra found a result page '%s' that could not be loaded, removing from index", documentId)); - pageRemoved(Wiki.contents().page(this.engine, documentId)); + LOG.error( format( "Failed to remove Page '%s' from index", pageName ) ); } - break; - default: - LOG.error(format("Unknown query result type: %s", item.getType())); - } } - return searchResults; - } - - /** - * This method initialize the AWS Kendra Index and Datasources to be used. - * - * @throws InterruptedException - */ - public void initializeIndexAndDataSource() throws InterruptedException { - this.indexId = getIndexId(indexName); - if (this.indexId == null) { - String message = format("Index '%s' does not exists", indexName); - LOG.error(message); - throw new IllegalArgumentException(message); - } - this.dataSourceId = getDatasourceId(this.indexId, dataSourceName); - if (this.dataSourceId == null) { - String message = format("Datasource '%s' does not exists in index %s", dataSourceName, indexName); - LOG.error(message); - throw new IllegalArgumentException(message); - } - } - - /** - * Given an Kendra's Index name, returns the corresponding Index Id, or - * {@code null} if it does not exists - * - * @param indexName the name of the index to look up - * @return the index id or {@code null} - */ - private String getIndexId(String indexName) { - ListIndicesRequest request = new ListIndicesRequest(); - ListIndicesResult result = getKendra().listIndices(request); - String nextToken = ""; - while (nextToken != null) { - List<IndexConfigurationSummary> items = result.getIndexConfigurationSummaryItems(); - if (items == null || items.isEmpty()) { - return null; - } - for (IndexConfigurationSummary item : items) { - if (StringUtils.equals(item.getName(), indexName)) { - return item.getId(); + + /** + * {@inheritDoc} + */ + @Override + public void reindexPage( final Page page ) { + if ( page != null ) { + updates.add( page ); + LOG.debug( format( "Scheduling page '%s' for indexing ...", page.getName() ) ); } - } - nextToken = result.getNextToken(); - request = new ListIndicesRequest().withNextToken(result.getNextToken()); - result = getKendra().listIndices(request); } - return null; - } - - /** - * Given an Kendra's Datasource name, returns the corresponding Datasource Id, - * or {@code null} if it does not exists - * - * @param dataSourceName the name of the datasource to look up - * @return the datasource id or {@code null} - */ - private String getDatasourceId(String indexId, String dataSourceName) { - ListDataSourcesRequest request = new ListDataSourcesRequest().withIndexId(indexId); - ListDataSourcesResult result = getKendra().listDataSources(request); - String nextToken = ""; - while (nextToken != null) { - List<DataSourceSummary> items = result.getSummaryItems(); - if (items == null || items.isEmpty()) { - return null; - } - - for (DataSourceSummary item : items) { - if (StringUtils.equals(item.getName(), dataSourceName)) { - return item.getId(); + + /** + * {@inheritDoc} + */ + @Override + public Collection< SearchResult > findPages( final String query, final Context wikiContext ) throws ProviderException, IOException { + final QueryRequest request = new QueryRequest().withIndexId( indexId ).withQueryText( query ); + final List< QueryResultItem > items; + try { + items = getKendra().query( request ).getResultItems(); + } catch ( final ThrottlingException e ) { + LOG.error( "ThrottlingException. Skipping..." ); + return new ArrayList<>(); } - } - nextToken = result.getNextToken(); - request = new ListDataSourcesRequest().withNextToken(result.getNextToken()); - result = getKendra().listDataSources(request); + final List< SearchResult > searchResults = new ArrayList<>( items.size() ); + final AuthorizationManager mgr = engine.getManager( AuthorizationManager.class ); + + for ( final QueryResultItem item : items ) { + switch ( QueryResultType.fromValue( item.getType() ) ) { + case DOCUMENT: + final String documentId = item.getDocumentId(); + final String documentExcerpt = item.getDocumentExcerpt().getText(); + final String scoreConfidence = item.getScoreAttributes().getScoreConfidence(); + final Page page = this.engine.getManager( PageManager.class ).getPage( documentId, PageProvider.LATEST_VERSION ); + if ( page != null ) { + final PagePermission pp = new PagePermission( page, PagePermission.VIEW_ACTION ); + if ( mgr.checkPermission( wikiContext.getWikiSession(), pp ) ) { + final SearchResult searchResult = new SearchResultImpl( page, confidence2score( scoreConfidence ), + new String[]{ documentExcerpt } ); + searchResults.add( searchResult ); + } else { + LOG.error( format( "Page '%s' is not accessible", documentId ) ); + } + } else { + LOG.error( + format( "Kendra found a result page '%s' that could not be loaded, removing from index", documentId ) ); + pageRemoved( Wiki.contents().page( this.engine, documentId ) ); + } + break; + default: + LOG.error( format( "Unknown query result type: %s", item.getType() ) ); + } + } + return searchResults; } - return null; - } - - /* - * Converts a SCORE Confidence from Kendra to an "equivalent" integer score - */ - private int confidence2score(String scoreConfidence) { - switch (ScoreConfidence.fromValue(scoreConfidence)) { - case VERY_HIGH: - return 100; - case HIGH: - return 75; - case MEDIUM: - return 50; - case LOW: - return 25; - default: - return 0; + + /** + * This method initialize the AWS Kendra Index and Datasources to be used. + */ + public void initializeIndexAndDataSource() { + this.indexId = getIndexId( indexName ); + if ( this.indexId == null ) { + final String message = format( "Index '%s' does not exist", indexName ); + LOG.error( message ); + throw new IllegalArgumentException( message ); + } + this.dataSourceId = getDatasourceId( this.indexId, dataSourceName ); + if ( this.dataSourceId == null ) { + final String message = format( "Datasource '%s' does not exist in index %s", dataSourceName, indexName ); + LOG.error( message ); + throw new IllegalArgumentException( message ); + } } - } - - /** - * This method re-index all the pages found in the Wiki. It is mainly used at - * startup. - * - * @throws IOException in case some page can not be read - */ - private void doFullReindex() throws IOException { - try { - Collection<Page> pages = engine.getManager(PageManager.class).getAllPages(); - if (pages.isEmpty()) { - return; - } - LOG.debug(format("Indexing all %d pages. Please wait ...", pages.size())); - String executionId = startExecution(); - for (Page page : pages) { - // Since I do not want to handle the size limit - // (https://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/BatchPutDocument) - // uploading documents one at a time - indexOnePage(page, executionId); - } - } catch (ProviderException e) { - LOG.error(e.getMessage()); - throw new IOException(e); - } finally { - stopExecution(); + + /** + * Given an Kendra's Index name, returns the corresponding Index Id, or + * {@code null} if it does not exists + * + * @param indexName the name of the index to look up + * @return the index id or {@code null} + */ + private String getIndexId( final String indexName ) { + ListIndicesRequest request = new ListIndicesRequest(); + ListIndicesResult result = getKendra().listIndices( request ); + String nextToken = ""; + while ( nextToken != null ) { + final List< IndexConfigurationSummary > items = result.getIndexConfigurationSummaryItems(); + if ( items == null || items.isEmpty() ) { + return null; + } + for ( final IndexConfigurationSummary item : items ) { + if ( StringUtils.equals( item.getName(), indexName ) ) { + return item.getId(); + } + } + nextToken = result.getNextToken(); + request = new ListIndicesRequest().withNextToken( result.getNextToken() ); + result = getKendra().listIndices( request ); + } + return null; } - } - - /** - * This method re-index all pages marked as updated. It is used to periodically - * index pages that have been modified - */ - private void doPartialReindex() { - if (updates.isEmpty()) { - return; + + /** + * Given an Kendra's Datasource name, returns the corresponding Datasource Id, + * or {@code null} if it does not exists + * + * @param dataSourceName the name of the datasource to look up + * @return the datasource id or {@code null} + */ + private String getDatasourceId( final String indexId, final String dataSourceName ) { + ListDataSourcesRequest request = new ListDataSourcesRequest().withIndexId( indexId ); + ListDataSourcesResult result = getKendra().listDataSources( request ); + String nextToken = ""; + while ( nextToken != null ) { + final List< DataSourceSummary > items = result.getSummaryItems(); + if ( items == null || items.isEmpty() ) { + return null; + } + + for ( final DataSourceSummary item : items ) { + if ( StringUtils.equals( item.getName(), dataSourceName ) ) { + return item.getId(); + } + } + nextToken = result.getNextToken(); + request = new ListDataSourcesRequest().withNextToken( result.getNextToken() ); + result = getKendra().listDataSources( request ); + } + return null; } - LOG.debug(format("Indexing updated pages. Please wait ...")); - String executionId = startExecution(); - synchronized (updates) { - try { - while (updates.size() > 0) { - indexOnePage(updates.remove(0), executionId); + + /* + * Converts a SCORE Confidence from Kendra to an "equivalent" integer score + */ + private int confidence2score( final String scoreConfidence ) { + switch ( ScoreConfidence.fromValue( scoreConfidence ) ) { + case VERY_HIGH: + return 100; + case HIGH: + return 75; + case MEDIUM: + return 50; + case LOW: + return 25; + default: + return 0; } - } finally { - stopExecution(); - } } - } - - /** - * Returns an ExecutiuonId that is required to keep track of the modifed - * documents - * @return The execution id - */ - private String startExecution() { - StartDataSourceSyncJobRequest request = new StartDataSourceSyncJobRequest().withIndexId(indexId) - .withId(dataSourceId); - StartDataSourceSyncJobResult result = getKendra().startDataSourceSyncJob(request); - return result.getExecutionId(); - } - - /** - * Stop the execution for the given index Id and DataSource Id. - */ - private void stopExecution() { - StopDataSourceSyncJobRequest request = new StopDataSourceSyncJobRequest().withIndexId(indexId).withId(dataSourceId); - getKendra().stopDataSourceSyncJob(request); - } - - /** - * Index on single {@link Page} into the Kendra Index - * @param page the {@link Page} to index - * @param executionId The Execution Id - */ - private void indexOnePage(Page page, String executionId) { - String pageName = page.getName(); - try { - Document document = newDocument(page, executionId); - BatchPutDocumentRequest request = new BatchPutDocumentRequest().withIndexId(indexId) - .withDocuments(document); - BatchPutDocumentResult result = getKendra().batchPutDocument(request); - if (result.getFailedDocuments().size() == 0) { - LOG.info(format("Successfully indexed Page '%s' as %s", page.getName(), document.getContentType())); - } else { - for (BatchPutDocumentResponseFailedDocument failedDocument : result.getFailedDocuments()) { - LOG.error(format("Failed to index Page '%s': %s", failedDocument.getId(), failedDocument.getErrorMessage())); + + /** + * This method re-index all the pages found in the Wiki. It is mainly used at + * startup. + * + * @throws IOException in case some page can not be read + */ + private void doFullReindex() throws IOException { + try { + final Collection< Page > pages = engine.getManager( PageManager.class ).getAllPages(); + if ( pages.isEmpty() ) { + return; + } + LOG.debug( format( "Indexing all %d pages. Please wait ...", pages.size() ) ); + final String executionId = startExecution(); + for ( final Page page : pages ) { + // Since I do not want to handle the size limit + // (https://docs.aws.amazon.com/goto/WebAPI/kendra-2019-02-03/BatchPutDocument) + // uploading documents one at a time + indexOnePage( page, executionId ); + } + } catch ( final ProviderException e ) { + LOG.error( e.getMessage() ); + throw new IOException( e ); + } finally { + stopExecution(); } - } - } catch (IOException e) { - LOG.error(format("Failed to index Page '%s': %s", pageName, e.getMessage())); } - } - - - /** - * Given a {@link Page}, returns the corresponding Kendra {@link Document}. - * - * @param page the {@link Page} to be indexed - * @param executionId an execution id to identify when the {@link Page} was - * indexed for the last time. - * @return a {@link Document} containing the searchable attributes. - * @throws IOException if the {@link Page}'s {@link Attachment} can not be read. - */ - private Document newDocument(Page page, String executionId) throws IOException { - String pageName = page.getName(); - List<DocumentAttribute> attrs = new ArrayList<>(); - // These 2 are required as per - // https://docs.aws.amazon.com/kendra/latest/dg/data-source-custom.html#custom-required-attributes - attrs.add(newAttribute("_data_source_id", dataSourceId)); - attrs.add(newAttribute("_data_source_sync_job_execution_id", executionId)); - - String title = TextUtil.beautifyString(pageName); - ByteBuffer blob = null; - ContentType contentType = ContentType.PLAIN_TEXT; - if (page instanceof Attachment) { - Attachment attachment = (Attachment) page; - InputStream is = null; - try { - String filename = attachment.getFileName(); - contentType = getContentType(filename); - is = engine.getManager(AttachmentManager.class).getAttachmentStream(attachment); - blob = ByteBuffer.wrap(IOUtils.toByteArray(is)); - } catch (ProviderException e) { - throw new IOException(e); - } finally { - IOUtils.closeQuietly(is, null); - } - // contentType should be set to its real value - } else { - String text = engine.getManager(PageManager.class).getPureText(page); - blob = ByteBuffer.wrap(text.getBytes(StandardCharsets.UTF_8)); + + /** + * This method re-index all pages marked as updated. It is used to periodically + * index pages that have been modified + */ + private void doPartialReindex() { + if ( updates.isEmpty() ) { + return; + } + LOG.debug( "Indexing updated pages. Please wait ..." ); + final String executionId = startExecution(); + synchronized ( updates ) { + try { + while ( updates.size() > 0 ) { + indexOnePage( updates.remove( 0 ), executionId ); + } + } finally { + stopExecution(); + } + } } - return new Document().withId(pageName).withTitle(title).withAttributes(attrs).withBlob(blob) - .withContentType(contentType); - } - - private DocumentAttribute newAttribute(String key, String value) { - return new DocumentAttribute().withKey(key).withValue(new DocumentAttributeValue().withStringValue(value)); - } - - @SuppressWarnings("unchecked") - private ContentType getContentType(String filename) { - String extention = FilenameUtils.getExtension(filename); - Map<String, String> ct = (Map<String, String>) this.contentTypes.get("ContentTypes"); - return ContentType.fromValue(ct.getOrDefault(extention, ContentType.PLAIN_TEXT.name())); - } - - /** - * Updater thread that updates Kendra indexes. - */ - private static final class KendraUpdater extends WikiBackgroundThread { - protected static final int INDEX_DELAY = 5; - protected static final int INITIAL_DELAY = 10; - private KendraSearchProvider provider; - - private int initialDelay; - - private WatchDog watchdog; - - private KendraUpdater(Engine engine, KendraSearchProvider provider, int initialDelay, int indexDelay) { - super(engine, indexDelay); - this.provider = provider; - this.initialDelay = initialDelay; - setName("JSPWiki Kendra Indexer"); + + /** + * Returns an ExecutiuonId that is required to keep track of the modifed + * documents + * + * @return The execution id + */ + private String startExecution() { + final StartDataSourceSyncJobRequest request = new StartDataSourceSyncJobRequest().withIndexId( indexId ) + .withId( dataSourceId ); + final StartDataSourceSyncJobResult result = getKendra().startDataSourceSyncJob( request ); + return result.getExecutionId(); } - @Override - public void startupTask() throws Exception { - watchdog = WatchDog.getCurrentWatchDog(getEngine()); - try { - Thread.sleep(initialDelay * 1000L); - } catch (InterruptedException e) { - throw new InternalWikiException("Interrupted while waiting to start.", e); - } - watchdog.enterState("Full reindex"); - provider.initializeIndexAndDataSource(); - provider.doFullReindex(); - watchdog.exitState(); + /** + * Stop the execution for the given index Id and DataSource Id. + */ + private void stopExecution() { + final StopDataSourceSyncJobRequest request = new StopDataSourceSyncJobRequest().withIndexId( indexId ).withId( dataSourceId ); + getKendra().stopDataSourceSyncJob( request ); } - @Override - public void backgroundTask() { - watchdog.enterState("Reindexing ...", 60); - provider.doPartialReindex(); - watchdog.exitState(); + /** + * Index on single {@link Page} into the Kendra Index + * + * @param page the {@link Page} to index + * @param executionId The Execution Id + */ + private void indexOnePage( final Page page, final String executionId ) { + final String pageName = page.getName(); + try { + final Document document = newDocument( page, executionId ); + final BatchPutDocumentRequest request = new BatchPutDocumentRequest().withIndexId( indexId ) + .withDocuments( document ); + final BatchPutDocumentResult result = getKendra().batchPutDocument( request ); + if ( result.getFailedDocuments().size() == 0 ) { + LOG.info( format( "Successfully indexed Page '%s' as %s", page.getName(), document.getContentType() ) ); + } else { + for ( final BatchPutDocumentResponseFailedDocument failedDocument : result.getFailedDocuments() ) { + LOG.error( format( "Failed to index Page '%s': %s", failedDocument.getId(), failedDocument.getErrorMessage() ) ); + } + } + } catch ( final IOException e ) { + LOG.error( format( "Failed to index Page '%s': %s", pageName, e.getMessage() ) ); + } } - } - private static class SearchResultImpl implements SearchResult { - private Page page; - private int score; - private String[] contexts; + /** + * Given a {@link Page}, returns the corresponding Kendra {@link Document}. + * + * @param page the {@link Page} to be indexed + * @param executionId an execution id to identify when the {@link Page} was + * indexed for the last time. + * @return a {@link Document} containing the searchable attributes. + * @throws IOException if the {@link Page}'s {@link Attachment} can not be read. + */ + private Document newDocument( final Page page, final String executionId ) throws IOException { + final String pageName = page.getName(); + final List< DocumentAttribute > attrs = new ArrayList<>(); + // These 2 are required as per + // https://docs.aws.amazon.com/kendra/latest/dg/data-source-custom.html#custom-required-attributes + attrs.add( newAttribute( "_data_source_id", dataSourceId ) ); + attrs.add( newAttribute( "_data_source_sync_job_execution_id", executionId ) ); + + final String title = TextUtil.beautifyString( pageName ); + ByteBuffer blob; + ContentType contentType = ContentType.PLAIN_TEXT; + if ( page instanceof Attachment ) { + final Attachment attachment = ( Attachment ) page; + InputStream is = null; + try { + final String filename = attachment.getFileName(); + contentType = getContentType( filename ); + is = engine.getManager( AttachmentManager.class ).getAttachmentStream( attachment ); + blob = ByteBuffer.wrap( IOUtils.toByteArray( is ) ); + } catch ( final ProviderException e ) { + throw new IOException( e ); + } finally { + IOUtils.closeQuietly( is, null ); + } + // contentType should be set to its real value + } else { + final String text = engine.getManager( PageManager.class ).getPureText( page ); + blob = ByteBuffer.wrap( text.getBytes( StandardCharsets.UTF_8 ) ); + } + return new Document().withId( pageName ).withTitle( title ).withAttributes( attrs ).withBlob( blob ) + .withContentType( contentType ); + } - public SearchResultImpl(Page page, int score, String[] contexts) { - this.page = page; - this.score = score; - this.contexts = contexts != null ? contexts.clone() : null; + private DocumentAttribute newAttribute( final String key, final String value ) { + return new DocumentAttribute().withKey( key ).withValue( new DocumentAttributeValue().withStringValue( value ) ); } - @Override - public Page getPage() { - return this.page; + @SuppressWarnings( "unchecked" ) + private ContentType getContentType( final String filename ) { + final String extention = FilenameUtils.getExtension( filename ); + final Map< String, String > ct = ( Map< String, String > ) this.contentTypes.get( "ContentTypes" ); + return ContentType.fromValue( ct.getOrDefault( extention, ContentType.PLAIN_TEXT.name() ) ); } - @Override - public int getScore() { - return this.score; + /** + * Updater thread that updates Kendra indexes. + */ + private static final class KendraUpdater extends WikiBackgroundThread { + protected static final int INDEX_DELAY = 5; + protected static final int INITIAL_DELAY = 10; + private final KendraSearchProvider provider; + + private final int initialDelay; + + private WatchDog watchdog; + + private KendraUpdater( final Engine engine, final KendraSearchProvider provider, final int initialDelay, final int indexDelay ) { + super( engine, indexDelay ); + this.provider = provider; + this.initialDelay = initialDelay; + setName( "JSPWiki Kendra Indexer" ); + } + + @Override + public void startupTask() throws Exception { + watchdog = WatchDog.getCurrentWatchDog( getEngine() ); + try { + Thread.sleep( initialDelay * 1000L ); + } catch ( final InterruptedException e ) { + throw new InternalWikiException( "Interrupted while waiting to start.", e ); + } + watchdog.enterState( "Full reindex" ); + provider.initializeIndexAndDataSource(); + provider.doFullReindex(); + watchdog.exitState(); + } + + @Override + public void backgroundTask() { + watchdog.enterState( "Reindexing ...", 60 ); + provider.doPartialReindex(); + watchdog.exitState(); + } } - @Override - public String[] getContexts() { - return this.contexts; + private static class SearchResultImpl implements SearchResult { + + private final Page page; + private final int score; + private final String[] contexts; + + public SearchResultImpl( final Page page, final int score, final String[] contexts ) { + this.page = page; + this.score = score; + this.contexts = contexts != null ? contexts.clone() : null; + } + + @Override + public Page getPage() { + return this.page; + } + + @Override + public int getScore() { + return this.score; + } + + @Override + public String[] getContexts() { + return this.contexts; + } } - } - public AWSkendra getKendra() { - return kendra; - } + public AWSkendra getKendra() { + return kendra; + } - public void setKendra(AWSkendra kendra) { - this.kendra = kendra; - } + public void setKendra( final AWSkendra kendra ) { + this.kendra = kendra; + } - protected AWSkendra buildClient() { - return AWSkendraClientBuilder.defaultClient(); - } + protected AWSkendra buildClient() { + return AWSkendraClientBuilder.defaultClient(); + } - public String getIndexName() { - return indexName; - } + public String getIndexName() { + return indexName; + } - public String getDataSourceName() { - return dataSourceName; - } + public String getDataSourceName() { + return dataSourceName; + } } \ No newline at end of file diff --git a/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java b/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java index 9c769ce..3f11e46 100644 --- a/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java +++ b/jspwiki-kendra-searchprovider/src/test/java/org/apache/wiki/search/kendra/KendraSearchProviderTest.java @@ -44,204 +44,212 @@ import static org.mockito.ArgumentMatchers.any; import static org.mockito.Mockito.lenient; import static org.mockito.Mockito.when; -@ExtendWith(MockitoExtension.class) +@ExtendWith( MockitoExtension.class ) public class KendraSearchProviderTest { - static final Properties props = TestEngine.getTestProperties(); - static final TestEngine engine = TestEngine.build( props ); - KendraSearchProvider searchProvider; - - @Mock - AWSkendra kendraMock; - - @BeforeEach - void setUp( final TestInfo testInfo ) throws Exception { - TestEngine.emptyWorkDir(props); - - // before each test I setup the Kendra Client - searchProvider = new KendraSearchProvider() { - @Override - protected AWSkendra buildClient() { - return kendraMock; - } - }; - searchProvider.initialize( engine, props ); - final Method m = testInfo.getTestMethod().get(); - final String indexName; - final String dataSourceName; - if (m.isAnnotationPresent(WithKendra.class)) { - final WithKendra withKendra = m.getAnnotation(WithKendra.class); - indexName = withKendra.indexName(); - dataSourceName = withKendra.dataSourceName(); - setUpKendraMock(indexName, dataSourceName); - searchProvider.setKendra(kendraMock); - if (StringUtils.isNotBlank(indexName) && StringUtils.isNotBlank(dataSourceName)) { - searchProvider.initializeIndexAndDataSource(); - } - } else { - setUpKendraMock(null, null); - searchProvider.setKendra(kendraMock); + static final Properties props = TestEngine.getTestProperties(); + static final TestEngine engine = TestEngine.build( props ); + KendraSearchProvider searchProvider; + + @Mock + AWSkendra kendraMock; + + @BeforeEach + void setUp( final TestInfo testInfo ) throws Exception { + TestEngine.emptyWorkDir( props ); + + // before each test I setup the Kendra Client + searchProvider = new KendraSearchProvider() { + @Override + protected AWSkendra buildClient() { + return kendraMock; + } + }; + searchProvider.initialize( engine, props ); + final Method m = testInfo.getTestMethod().get(); + final String indexName; + final String dataSourceName; + if ( m.isAnnotationPresent( WithKendra.class ) ) { + final WithKendra withKendra = m.getAnnotation( WithKendra.class ); + indexName = withKendra.indexName(); + dataSourceName = withKendra.dataSourceName(); + setUpKendraMock( indexName, dataSourceName ); + searchProvider.setKendra( kendraMock ); + if ( StringUtils.isNotBlank( indexName ) && StringUtils.isNotBlank( dataSourceName ) ) { + searchProvider.initializeIndexAndDataSource(); + } + } else { + setUpKendraMock( null, null ); + searchProvider.setKendra( kendraMock ); + } + // And possibly the pages that will be present in the wiki + if ( m.isAnnotationPresent( WithPages.class ) ) { + final WithPages withPages = m.getAnnotation( WithPages.class ); + addPages( withPages.value() ); + } + if ( m.isAnnotationPresent( WithPage.class ) ) { + final WithPage withPage = m.getAnnotation( WithPage.class ); + addPages( withPage ); + } + // and the corresponding search results + if ( m.isAnnotationPresent( WithResults.class ) ) { + final WithResults withResults = m.getAnnotation( WithResults.class ); + addResults( withResults.value() ); + } + if ( m.isAnnotationPresent( WithResult.class ) ) { + final WithResult withResult = m.getAnnotation( WithResult.class ); + addResults( withResult ); + } + } + + @AfterEach + void tearDown( final TestInfo testInfo ) { + final Method m = testInfo.getTestMethod().get(); + // And possibly the pages that will be present in the wiki + if ( m.isAnnotationPresent( WithPage.class ) ) { + final WithPage withPage = m.getAnnotation( WithPage.class ); + engine.deleteTestPage( withPage.name() ); + } + } + + void debugSearchResults( final Collection< SearchResult > res ) { + res.forEach( next -> { + System.out.println( "page: " + next.getPage() ); + for ( final String s : next.getContexts() ) { + System.out.println( "snippet: " + s ); + } + } ); + } + + Callable< Boolean > findsResultsFor( final Collection< SearchResult > res, final String text ) { + return () -> { + final MockHttpServletRequest request = engine.newHttpRequest(); + final Context ctx = Wiki.context().create( engine, request, ContextEnum.PAGE_EDIT.getRequestContext() ); + final Collection< SearchResult > searchResults = searchProvider.findPages( text, ctx ); + if ( searchResults != null && !searchResults.isEmpty() ) { + debugSearchResults( searchResults ); + res.addAll( searchResults ); + return true; + } + return false; + }; } - // And possibly the pages that will be present in the wiki - if (m.isAnnotationPresent(WithPages.class)) { - final WithPages withPages = m.getAnnotation(WithPages.class); - addPages(withPages.value()); + + @Test + public void testInvalidIndexName() { + // IndexName is invalid... + Assertions.assertThrows( IllegalArgumentException.class, () -> searchProvider.initializeIndexAndDataSource() ); } - if (m.isAnnotationPresent(WithPage.class)) { - final WithPage withPage = m.getAnnotation(WithPage.class); - addPages(withPage); + + @Test + @WithKendra( indexName = "JSPWikiIndex", dataSourceName = "" ) + public void testInvalidDataSourceName() { + // IndexName is invalid... + Assertions.assertThrows( IllegalArgumentException.class, () -> searchProvider.initializeIndexAndDataSource() ); } - // and the corresponding search results - if (m.isAnnotationPresent(WithResults.class)) { - final WithResults withResults = m.getAnnotation(WithResults.class); - addResults(withResults.value()); + + @Test + @WithKendra( indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource" ) + @WithPage( name = "TestPage", + text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", + attachments = {} ) + public void testSearchNoResult() throws Exception { + final Collection< SearchResult > res = new ArrayList<>(); + Assertions.assertFalse( findsResultsFor( res, "this text does not exists" ).call() ); + Assertions.assertEquals( 0, res.size(), "has result. none were expected" ); } - if(m.isAnnotationPresent(WithResult.class)) { - final WithResult withResult = m.getAnnotation(WithResult.class); - addResults(withResult); + + @Test + @WithKendra( indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource" ) + @WithPage( name = "TestPage", + text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", + attachments = {} ) + @WithResult( name = "TestPage", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH ) + public void testSimpleSearch() throws Exception { + final Collection< SearchResult > searchResults = new ArrayList<>(); + Assertions.assertTrue( findsResultsFor( searchResults, "mankind" ).call() ); + Assertions.assertEquals( 1, searchResults.size(), "no pages. one was expectd" ); + Assertions.assertEquals( "TestPage", searchResults.iterator().next().getPage().getName(), "the page TestPage was expected" ); } - } - - @AfterEach - void tearDown( final TestInfo testInfo ) { - final Method m = testInfo.getTestMethod().get(); - // And possibly the pages that will be present in the wiki - if (m.isAnnotationPresent(WithPage.class)) { - final WithPage withPage = m.getAnnotation(WithPage.class); - engine.deleteTestPage(withPage.name()); + + @Test + @WithKendra( indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource" ) + @WithPage( name = "TestPage", + text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", + attachments = {} ) + @WithPage( name = "TestPage2", + text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", + attachments = {} ) + @WithResult( name = "TestPage", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH ) + @WithResult( name = "TestPage2", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH ) + public void testSimpleSearch2() throws Exception { + final Collection< SearchResult > searchResults = new ArrayList<>(); + Assertions.assertTrue( findsResultsFor( searchResults, "mankind" ).call() ); + Assertions.assertEquals( 2, searchResults.size(), "2 pages were expectd" ); + final Iterator< SearchResult > i = searchResults.iterator(); + Assertions.assertEquals( "TestPage", i.next().getPage().getName(), "the page TestPage was expected" ); + Assertions.assertEquals( "TestPage2", i.next().getPage().getName(), "the page TestPage2 was expected" ); } - } - - void debugSearchResults(final Collection<SearchResult> res) { - res.forEach(next -> { - System.out.println("page: " + next.getPage()); - for (final String s : next.getContexts()) { - System.out.println("snippet: " + s); - } - }); - } - - Callable<Boolean> findsResultsFor(final Collection<SearchResult> res, final String text) { - return () -> { - final MockHttpServletRequest request = engine.newHttpRequest(); - final Context ctx = Wiki.context().create(engine, request, ContextEnum.PAGE_EDIT.getRequestContext()); - final Collection<SearchResult> searchResults = searchProvider.findPages(text, ctx); - if (searchResults != null && !searchResults.isEmpty()) { - debugSearchResults(searchResults); - res.addAll(searchResults); - return true; - } - return false; - }; - } - - @Test - public void testInvalidIndexName() { - // IndexName is invalid... - Assertions.assertThrows( IllegalArgumentException.class, () -> searchProvider.initializeIndexAndDataSource() ); - } - - @Test - @WithKendra(indexName = "JSPWikiIndex", dataSourceName = "") - public void testInvalidDataSourceName() { - // IndexName is invalid... - Assertions.assertThrows(IllegalArgumentException.class, () -> searchProvider.initializeIndexAndDataSource() ); - } - - @Test - @WithKendra(indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource") - @WithPage(name = "TestPage", text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", attachments = {}) - public void testSearchNoResult() throws Exception { - final Collection<SearchResult> res = new ArrayList<>(); - Assertions.assertFalse(findsResultsFor(res, "this text does not exists").call()); - Assertions.assertEquals(0, res.size(), "has result. none were expected"); - } - - @Test - @WithKendra(indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource") - @WithPage(name = "TestPage", text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", attachments = {}) - @WithResult(name = "TestPage", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH) - public void testSimpleSearch() throws Exception { - final Collection<SearchResult> searchResults = new ArrayList<>(); - Assertions.assertTrue(findsResultsFor(searchResults, "mankind").call()); - Assertions.assertEquals(1, searchResults.size(), "no pages. one was expectd"); - Assertions.assertEquals("TestPage", searchResults.iterator().next().getPage().getName(), "the page TestPage was expected"); - } - - @Test - @WithKendra(indexName = "JSPWikiIndex", dataSourceName = "JSPWikiDataSource") - @WithPage(name = "TestPage", text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", attachments = {}) - @WithPage(name = "TestPage2", text = "It was the dawn of the third age of mankind, ten years after the Earth-Minbari War.", attachments = {}) - @WithResult(name = "TestPage", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH) - @WithResult(name = "TestPage2", text = "mankind", scoreConfidence = ScoreConfidence.VERY_HIGH) - public void testSimpleSearch2() throws Exception { - final Collection<SearchResult> searchResults = new ArrayList<>(); - Assertions.assertTrue(findsResultsFor(searchResults, "mankind").call()); - Assertions.assertEquals(2, searchResults.size(), "2 pages were expectd"); - final Iterator<SearchResult> i = searchResults.iterator(); - Assertions.assertEquals("TestPage", i.next().getPage().getName(), "the page TestPage was expected"); - Assertions.assertEquals("TestPage2", i.next().getPage().getName(), "the page TestPage2 was expected"); - } - - private void setUpKendraMock(final String indexName, final String dataSourceName) { - final String indexId = UUID.randomUUID().toString(); - final String dataSourceId = UUID.randomUUID().toString(); - when(kendraMock.listIndices(any(ListIndicesRequest.class))).then( ( Answer< ListIndicesResult > ) invocation -> { - ListIndicesResult result = new ListIndicesResult(); - if (StringUtils.isNotBlank(indexName)) { - result.withIndexConfigurationSummaryItems(new IndexConfigurationSummary().withId(indexId).withName(indexName)); - } - return result; - } ); - lenient().when(kendraMock.listDataSources(any(ListDataSourcesRequest.class))) - .then( ( Answer< ListDataSourcesResult > ) invocation -> { - final ListDataSourcesResult result = new ListDataSourcesResult(); - if (StringUtils.isNotBlank(dataSourceName)) { - result.withSummaryItems(new DataSourceSummary().withId(dataSourceId).withName(dataSourceName)); + + private void setUpKendraMock( final String indexName, final String dataSourceName ) { + final String indexId = UUID.randomUUID().toString(); + final String dataSourceId = UUID.randomUUID().toString(); + when( kendraMock.listIndices( any( ListIndicesRequest.class ) ) ).then( ( Answer< ListIndicesResult > ) invocation -> { + final ListIndicesResult result = new ListIndicesResult(); + if ( StringUtils.isNotBlank( indexName ) ) { + result.withIndexConfigurationSummaryItems( new IndexConfigurationSummary().withId( indexId ).withName( indexName ) ); } return result; } ); - lenient().when(kendraMock.startDataSourceSyncJob(any(StartDataSourceSyncJobRequest.class))) - .then( ( Answer< StartDataSourceSyncJobResult > ) invocation -> new StartDataSourceSyncJobResult().withExecutionId("executionId") ); - lenient().when(kendraMock.batchPutDocument(any(BatchPutDocumentRequest.class))) - .then( ( Answer< BatchPutDocumentResult > ) invocation -> { - final BatchPutDocumentResult result = new BatchPutDocumentResult(); - result.withFailedDocuments(new ArrayList<>()); + lenient().when( kendraMock.listDataSources( any( ListDataSourcesRequest.class ) ) ) + .then( ( Answer< ListDataSourcesResult > ) invocation -> { + final ListDataSourcesResult result = new ListDataSourcesResult(); + if ( StringUtils.isNotBlank( dataSourceName ) ) { + result.withSummaryItems( new DataSourceSummary().withId( dataSourceId ).withName( dataSourceName ) ); + } + return result; + } ); + lenient().when( kendraMock.startDataSourceSyncJob( any( StartDataSourceSyncJobRequest.class ) ) ) + .then( ( Answer< StartDataSourceSyncJobResult > ) invocation -> new StartDataSourceSyncJobResult().withExecutionId( "executionId" ) ); + lenient().when( kendraMock.batchPutDocument( any( BatchPutDocumentRequest.class ) ) ) + .then( ( Answer< BatchPutDocumentResult > ) invocation -> { + final BatchPutDocumentResult result = new BatchPutDocumentResult(); + result.withFailedDocuments( new ArrayList<>() ); + return result; + } ); + lenient().when( kendraMock.query( any( QueryRequest.class ) ) ).then( ( Answer< QueryResult > ) invocation -> { + final QueryResult result = new QueryResult(); + result.withResultItems( new ArrayList<>() ); return result; } ); - lenient().when(kendraMock.query(any(QueryRequest.class))).then( ( Answer< QueryResult > ) invocation -> { - QueryResult result = new QueryResult(); - result.withResultItems(new ArrayList<>()); - return result; - } ); - } - - private void addPages(final WithPage... withPages) throws WikiException, IOException, URISyntaxException { - for(final WithPage withPage : withPages ) { - final String name = withPage.name(); - final String text = withPage.text(); - final String[] attachements = withPage.attachments(); - engine.saveText(name, text); - final ClassLoader classLoader = KendraSearchProviderTest.class.getClassLoader(); - for (final String attachement : attachements) { - final byte[] content = Files.readAllBytes(Paths.get(classLoader.getResource(attachement).toURI())); - engine.addAttachment(name, attachement, content); - } - } - } - - private void addResults(final WithResult... withResults) { - when(kendraMock.query(any(QueryRequest.class))).then( ( Answer< QueryResult > ) invocation -> { - final List<QueryResultItem> items = new ArrayList<>(); - for (final WithResult withResult : withResults) { - final QueryResultItem item = new QueryResultItem().withType(QueryResultType.DOCUMENT); - item.withDocumentId(withResult.name()); - item.withDocumentTitle(new TextWithHighlights().withText(withResult.name())); - item.withDocumentExcerpt(new TextWithHighlights().withText(withResult.text())); - item.withScoreAttributes(new ScoreAttributes().withScoreConfidence(withResult.scoreConfidence())); - items.add(item); - } - return new QueryResult().withResultItems(items); - } ); - } + } + + private void addPages( final WithPage... withPages ) throws WikiException, IOException, URISyntaxException { + for ( final WithPage withPage : withPages ) { + final String name = withPage.name(); + final String text = withPage.text(); + final String[] attachements = withPage.attachments(); + engine.saveText( name, text ); + final ClassLoader classLoader = KendraSearchProviderTest.class.getClassLoader(); + for ( final String attachement : attachements ) { + final byte[] content = Files.readAllBytes( Paths.get( classLoader.getResource( attachement ).toURI() ) ); + engine.addAttachment( name, attachement, content ); + } + } + } + + private void addResults( final WithResult... withResults ) { + when( kendraMock.query( any( QueryRequest.class ) ) ).then( ( Answer< QueryResult > ) invocation -> { + final List< QueryResultItem > items = new ArrayList<>(); + for ( final WithResult withResult : withResults ) { + final QueryResultItem item = new QueryResultItem().withType( QueryResultType.DOCUMENT ); + item.withDocumentId( withResult.name() ); + item.withDocumentTitle( new TextWithHighlights().withText( withResult.name() ) ); + item.withDocumentExcerpt( new TextWithHighlights().withText( withResult.text() ) ); + item.withScoreAttributes( new ScoreAttributes().withScoreConfidence( withResult.scoreConfidence() ) ); + items.add( item ); + } + return new QueryResult().withResultItems( items ); + } ); + } } \ No newline at end of file
