JAMES-2013 Add metric collection to Tika external service
Project: http://git-wip-us.apache.org/repos/asf/james-project/repo Commit: http://git-wip-us.apache.org/repos/asf/james-project/commit/356add84 Tree: http://git-wip-us.apache.org/repos/asf/james-project/tree/356add84 Diff: http://git-wip-us.apache.org/repos/asf/james-project/diff/356add84 Branch: refs/heads/master Commit: 356add848b4f4a00f81ff99c6b254e8cf0c7f96b Parents: 4bf777b Author: benwa <btell...@linagora.com> Authored: Mon Mar 26 11:07:12 2018 +0700 Committer: Matthieu Baechler <matth...@apache.org> Committed: Mon Mar 26 15:30:11 2018 +0200 ---------------------------------------------------------------------- .../elasticsearch/ElasticSearchIntegrationTest.java | 4 +++- .../elasticsearch/json/IndexableMessageTest.java | 3 ++- .../json/MessageToElasticSearchJsonTest.java | 3 ++- mailbox/tika/pom.xml | 4 ++++ .../apache/james/mailbox/tika/TikaTextExtractor.java | 12 +++++++++++- .../james/mailbox/tika/TikaTextExtractorTest.java | 9 +++++---- 6 files changed, 27 insertions(+), 8 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java index f18225b..ca9de0a 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/ElasticSearchIntegrationTest.java @@ -54,6 +54,7 @@ import org.apache.james.mailbox.tika.TikaConfiguration; import org.apache.james.mailbox.tika.TikaContainer; import org.apache.james.mailbox.tika.TikaHttpClientImpl; import org.apache.james.mailbox.tika.TikaTextExtractor; +import org.apache.james.metrics.api.NoopMetricFactory; import org.elasticsearch.client.Client; import org.junit.ClassRule; import org.junit.Rule; @@ -81,7 +82,8 @@ public class ElasticSearchIntegrationTest extends AbstractMessageSearchIndexTest @Override public void setUp() throws Exception { - textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder() + textExtractor = new TikaTextExtractor(new NoopMetricFactory(), + new TikaHttpClientImpl(TikaConfiguration.builder() .host(tika.getIp()) .port(tika.getPort()) .timeoutInMillis(tika.getTimeoutInMillis()) http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java index 052d0a2..fe1eff8 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/IndexableMessageTest.java @@ -46,6 +46,7 @@ import org.apache.james.mailbox.tika.TikaConfiguration; import org.apache.james.mailbox.tika.TikaContainer; import org.apache.james.mailbox.tika.TikaHttpClientImpl; import org.apache.james.mailbox.tika.TikaTextExtractor; +import org.apache.james.metrics.api.NoopMetricFactory; import org.junit.Before; import org.junit.ClassRule; import org.junit.Test; @@ -64,7 +65,7 @@ public class IndexableMessageTest { @Before public void setUp() throws Exception { - textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder() + textExtractor = new TikaTextExtractor(new NoopMetricFactory(), new TikaHttpClientImpl(TikaConfiguration.builder() .host(tika.getIp()) .port(tika.getPort()) .timeoutInMillis(tika.getTimeoutInMillis()) http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java ---------------------------------------------------------------------- diff --git a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java index 6e8e979..f4a4642 100644 --- a/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java +++ b/mailbox/elasticsearch/src/test/java/org/apache/james/mailbox/elasticsearch/json/MessageToElasticSearchJsonTest.java @@ -51,6 +51,7 @@ import org.apache.james.mailbox.tika.TikaConfiguration; import org.apache.james.mailbox.tika.TikaContainer; import org.apache.james.mailbox.tika.TikaHttpClientImpl; import org.apache.james.mailbox.tika.TikaTextExtractor; +import org.apache.james.metrics.api.NoopMetricFactory; import org.apache.james.util.ClassLoaderUtils; import org.junit.Before; import org.junit.ClassRule; @@ -79,7 +80,7 @@ public class MessageToElasticSearchJsonTest { @Before public void setUp() throws Exception { - textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder() + textExtractor = new TikaTextExtractor(new NoopMetricFactory(), new TikaHttpClientImpl(TikaConfiguration.builder() .host(tika.getIp()) .port(tika.getPort()) .timeoutInMillis(tika.getTimeoutInMillis()) http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/tika/pom.xml ---------------------------------------------------------------------- diff --git a/mailbox/tika/pom.xml b/mailbox/tika/pom.xml index de3c71c..8d78433 100644 --- a/mailbox/tika/pom.xml +++ b/mailbox/tika/pom.xml @@ -43,6 +43,10 @@ <scope>test</scope> </dependency> <dependency> + <groupId>${project.groupId}</groupId> + <artifactId>metrics-api</artifactId> + </dependency> + <dependency> <groupId>com.fasterxml.jackson.core</groupId> <artifactId>jackson-databind</artifactId> </dependency> http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java ---------------------------------------------------------------------- diff --git a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java index e38b2a8..955647e 100644 --- a/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java +++ b/mailbox/tika/src/main/java/org/apache/james/mailbox/tika/TikaTextExtractor.java @@ -32,6 +32,7 @@ import javax.inject.Inject; import org.apache.commons.lang3.StringUtils; import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; +import org.apache.james.metrics.api.MetricFactory; import com.fasterxml.jackson.core.JsonParseException; import com.fasterxml.jackson.core.JsonParser; @@ -44,6 +45,7 @@ import com.fasterxml.jackson.databind.JsonNode; import com.fasterxml.jackson.databind.ObjectMapper; import com.fasterxml.jackson.databind.module.SimpleModule; import com.fasterxml.jackson.databind.node.ObjectNode; +import com.github.fge.lambdas.Throwing; import com.github.steveash.guavate.Guavate; import com.google.common.annotations.VisibleForTesting; import com.google.common.base.MoreObjects; @@ -52,11 +54,13 @@ import com.google.common.collect.ImmutableList; public class TikaTextExtractor implements TextExtractor { + private final MetricFactory metricFactory; private final TikaHttpClient tikaHttpClient; private final ObjectMapper objectMapper; @Inject - public TikaTextExtractor(TikaHttpClient tikaHttpClient) { + public TikaTextExtractor(MetricFactory metricFactory, TikaHttpClient tikaHttpClient) { + this.metricFactory = metricFactory; this.tikaHttpClient = tikaHttpClient; this.objectMapper = initializeObjectMapper(); } @@ -71,6 +75,12 @@ public class TikaTextExtractor implements TextExtractor { @Override public ParsedContent extractContent(InputStream inputStream, String contentType) throws Exception { + return metricFactory.withMetric("tikaTextExtraction", Throwing.supplier( + () -> performContentExtraction(inputStream, contentType)) + .sneakyThrow()); + } + + public ParsedContent performContentExtraction(InputStream inputStream, String contentType) throws IOException { ContentAndMetadata contentAndMetadata = convert(tikaHttpClient.recursiveMetaDataAsJson(inputStream, contentType)); return new ParsedContent(contentAndMetadata.getContent(), contentAndMetadata.getMetadata()); } http://git-wip-us.apache.org/repos/asf/james-project/blob/356add84/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java ---------------------------------------------------------------------- diff --git a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java index e6d4f0d..455a275 100644 --- a/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java +++ b/mailbox/tika/src/test/java/org/apache/james/mailbox/tika/TikaTextExtractorTest.java @@ -32,6 +32,7 @@ import org.apache.commons.io.IOUtils; import org.apache.james.mailbox.extractor.ParsedContent; import org.apache.james.mailbox.extractor.TextExtractor; import org.apache.james.mailbox.tika.TikaTextExtractor.ContentAndMetadataDeserializer; +import org.apache.james.metrics.api.NoopMetricFactory; import org.junit.Before; import org.junit.ClassRule; import org.junit.Rule; @@ -54,7 +55,7 @@ public class TikaTextExtractorTest { @Before public void setUp() throws Exception { - textExtractor = new TikaTextExtractor(new TikaHttpClientImpl(TikaConfiguration.builder() + textExtractor = new TikaTextExtractor(new NoopMetricFactory(), new TikaHttpClientImpl(TikaConfiguration.builder() .host(tika.getIp()) .port(tika.getPort()) .timeoutInMillis(tika.getTimeoutInMillis()) @@ -156,7 +157,7 @@ public class TikaTextExtractorTest { @Test public void deserializerShouldNotThrowWhenMoreThanOneNode() throws Exception { TikaTextExtractor textExtractor = new TikaTextExtractor( - (inputStream, contentType) -> new ByteArrayInputStream(("[{\"X-TIKA:content\": \"This is an awesome LibreOffice document !\"}, " + + new NoopMetricFactory(), (inputStream, contentType) -> new ByteArrayInputStream(("[{\"X-TIKA:content\": \"This is an awesome LibreOffice document !\"}, " + "{\"Chroma BlackIsZero\": \"true\"}]").getBytes(StandardCharsets.UTF_8))); InputStream inputStream = null; @@ -167,7 +168,7 @@ public class TikaTextExtractorTest { public void deserializerShouldTakeFirstNodeWhenSeveral() throws Exception { String expectedExtractedContent = "content A"; TikaTextExtractor textExtractor = new TikaTextExtractor( - (inputStream, contentType) -> new ByteArrayInputStream(("[{\"X-TIKA:content\": \"" + expectedExtractedContent + "\"}, " + + new NoopMetricFactory(), (inputStream, contentType) -> new ByteArrayInputStream(("[{\"X-TIKA:content\": \"" + expectedExtractedContent + "\"}, " + "{\"X-TIKA:content\": \"content B\"}]").getBytes(StandardCharsets.UTF_8))); InputStream inputStream = null; @@ -182,7 +183,7 @@ public class TikaTextExtractorTest { expectedException.expectMessage("The element should be a Json object"); TikaTextExtractor textExtractor = new TikaTextExtractor( - (inputStream, contentType) -> new ByteArrayInputStream("[\"value1\"]".getBytes(StandardCharsets.UTF_8))); + new NoopMetricFactory(), (inputStream, contentType) -> new ByteArrayInputStream("[\"value1\"]".getBytes(StandardCharsets.UTF_8))); InputStream inputStream = null; textExtractor.extractContent(inputStream, "text/plain"); --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org