This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git

commit 9b036e11a2febb4e59f684d46011aad57c807f6f
Author: Benoit TELLIER <[email protected]>
AuthorDate: Mon Aug 26 16:21:02 2024 +0200

    [FIX] Prevent HtmlTextExtractor to generate asymmetric outputs
---
 .../james/jmap/utils/JsoupHtmlTextExtractor.java   | 34 +++++++++++++++++++++-
 .../org/apache/james/jmap/core/Capability.scala    |  4 ++-
 .../jmap/utils/JsoupHtmlTextExtractorTest.java     | 10 +++++++
 3 files changed, 46 insertions(+), 2 deletions(-)

diff --git 
a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
 
b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
index 14f10a0414..f1f60da203 100644
--- 
a/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
+++ 
b/server/protocols/jmap-rfc-8621/src/main/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractor.java
@@ -26,7 +26,10 @@ import java.util.function.Predicate;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
+import jakarta.inject.Inject;
+
 import org.apache.commons.lang3.StringUtils;
+import org.apache.james.jmap.core.JmapRfc8621Configuration;
 import org.apache.james.util.html.HtmlTextExtractor;
 import org.apache.james.util.streams.Iterators;
 import org.jsoup.Jsoup;
@@ -40,8 +43,24 @@ import org.slf4j.LoggerFactory;
 import com.google.common.base.Strings;
 
 public class JsoupHtmlTextExtractor implements HtmlTextExtractor {
-
     private static final Logger LOGGER = 
LoggerFactory.getLogger(JsoupHtmlTextExtractor.class);
+
+    private static class Context {
+        private final long limit;
+        private long outputSize = 0;
+
+        private Context(JmapRfc8621Configuration configuration) {
+            this.limit = configuration.maxSizeAttachmentsPerEmail().asLong();
+        }
+
+        void add(String s) {
+            outputSize += s.length();
+            if (outputSize > limit) {
+                throw new IllegalStateException("text/plain redering exceeds 
message limit");
+            }
+        }
+    }
+
     public static final String BR_TAG = "br";
     public static final String UL_TAG = "ul";
     public static final String OL_TAG = "ol";
@@ -51,6 +70,17 @@ public class JsoupHtmlTextExtractor implements 
HtmlTextExtractor {
     public static final String ALT_TAG = "alt";
     public static final int INITIAL_LIST_NESTED_LEVEL = 0;
 
+    private final JmapRfc8621Configuration configuration;
+
+    @Inject
+    public JsoupHtmlTextExtractor(JmapRfc8621Configuration configuration) {
+        this.configuration = configuration;
+    }
+
+    public JsoupHtmlTextExtractor() {
+        this.configuration = 
JmapRfc8621Configuration.LOCALHOST_CONFIGURATION();
+    }
+
     @Override
     public String toPlainText(String html) {
         try {
@@ -58,8 +88,10 @@ public class JsoupHtmlTextExtractor implements 
HtmlTextExtractor {
 
             Element body = 
Optional.ofNullable(document.body()).orElse(document);
 
+            Context context = new Context(configuration);
             return flatten(body)
                 .map(this::convertNodeToText)
+                .peek(context::add)
                 .collect(Collectors.joining());
         } catch (Exception e) {
             LOGGER.warn("Failed extracting text from html", e);
diff --git 
a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
 
b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
index cf99b0832a..407961b067 100644
--- 
a/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
+++ 
b/server/protocols/jmap-rfc-8621/src/main/scala/org/apache/james/jmap/core/Capability.scala
@@ -239,7 +239,9 @@ object MaxSizeAttachmentsPerEmail {
 case class MaxMailboxesPerEmail(value: Option[UnsignedInt])
 case class MaxMailboxDepth(value: Option[UnsignedInt])
 case class MaxSizeMailboxName(value: UnsignedInt)
-case class MaxSizeAttachmentsPerEmail(value: UnsignedInt)
+case class MaxSizeAttachmentsPerEmail(value: UnsignedInt) {
+  def asLong()= value.value
+}
 
 object JmapUploadQuotaLimit {
   def of(size: Size): Try[JmapUploadQuotaLimit] = 
refined.refineV[UnsignedIntConstraint](size.asBytes()) match {
diff --git 
a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
 
b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
index 4b6da3eae0..9f113c303b 100644
--- 
a/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
+++ 
b/server/protocols/jmap-rfc-8621/src/test/java/org/apache/james/jmap/utils/JsoupHtmlTextExtractorTest.java
@@ -65,6 +65,16 @@ public class JsoupHtmlTextExtractorTest {
         
assertThat(textExtractor.toPlainText(html)).isEqualTo(expectedPlainText);
     }
 
+    @Test
+    public void asymmetricOutputShouldNotThrowOOM() {
+        int count = 20000;
+        String html = "<ul><li>a</li><li>a</li>".repeat(count) + 
"</ul>".repeat(count);
+
+        // Computation aborted
+        assertThat(textExtractor.toPlainText(html))
+            .isEqualTo(html);
+    }
+
     @Test
     public void deeplyNestedHtmlShouldNotThrowStackOverflow() {
         final int count = 2048;


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to