This is an automated email from the ASF dual-hosted git repository.

btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git


The following commit(s) were added to refs/heads/master by this push:
     new d60ee4638b [FIX] Run hashing on separated thread only if relevant - 
chatGPT suggestion (#2553)
d60ee4638b is described below

commit d60ee4638b61e2dd0777f98c202a2e8ab8a4b76d
Author: Benoit TELLIER <[email protected]>
AuthorDate: Sat Dec 7 14:19:54 2024 +0100

    [FIX] Run hashing on separated thread only if relevant - chatGPT suggestion 
(#2553)
---
 docs/modules/servers/partials/configure/jvm.adoc      | 12 +++++++++++-
 .../sample-configuration/jvm.properties               |  7 ++++++-
 .../blob/deduplication/DeDuplicationBlobStore.scala   | 19 ++++++++++++++-----
 3 files changed, 31 insertions(+), 7 deletions(-)

diff --git a/docs/modules/servers/partials/configure/jvm.adoc 
b/docs/modules/servers/partials/configure/jvm.adoc
index 15011dc388..f6b5ce0ef2 100644
--- a/docs/modules/servers/partials/configure/jvm.adoc
+++ b/docs/modules/servers/partials/configure/jvm.adoc
@@ -153,4 +153,14 @@ Ex in `jvm.properties`
 ----
 james.relaxed.mailbox.name.validation=true
 ----
-To relax validating `\*` and `%` characters in the mailbox name. Be careful as 
`%` and `*` are ambiguous for the LIST / LSUB commands that interpret those as 
wildcard thus returning all mailboxes matching the pattern.
\ No newline at end of file
+To relax validat ing `\*` and `%` characters in the mailbox name. Be careful 
as `%` and `*` are ambiguous for the LIST / LSUB commands that interpret those 
as wildcard thus returning all mailboxes matching the pattern.
+
+== Customizing blob deduplication settings
+
+----
+# Count of octet from which hashing shall be done out of the IO threads in 
deduplicating blob store
+james.deduplicating.blobstore.thread.switch.threshold=32768
+
+# Count of octet from which streams are buffered to files and not to memory
+james.deduplicating.blobstore.file.threshold=10240
+----
\ No newline at end of file
diff --git a/server/apps/distributed-app/sample-configuration/jvm.properties 
b/server/apps/distributed-app/sample-configuration/jvm.properties
index 9365a7fe91..a5e6b4bc58 100644
--- a/server/apps/distributed-app/sample-configuration/jvm.properties
+++ b/server/apps/distributed-app/sample-configuration/jvm.properties
@@ -88,4 +88,9 @@ jmx.remote.x.mlet.allow.getMBeansFromURL=false
 
 # Relax validating `*` and `%` characters in the mailbox name. Defaults to 
false.
 # Be careful turning on this as `%` and `*` are ambiguous for the LIST / LSUB 
commands that interpret those as wildcard thus returning all mailboxes matching 
the pattern.
-#james.relaxed.mailbox.name.validation=true
\ No newline at end of file
+#james.relaxed.mailbox.name.validation=true
+
+# Count of octet from which hashing shall be done out of the IO threads in 
deduplicating blob store
+# james.deduplicating.blobstore.thread.switch.threshold=32768
+# Count of octet from which streams are buffered to files and not to memory
+# james.deduplicating.blobstore.file.threshold=10240
\ No newline at end of file
diff --git 
a/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
 
b/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
index dda70bf4c3..92dfb7f005 100644
--- 
a/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
+++ 
b/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
@@ -26,6 +26,7 @@ import jakarta.inject.{Inject, Named}
 import org.apache.commons.io.IOUtils
 import org.apache.james.blob.api.BlobStore.BlobIdProvider
 import org.apache.james.blob.api.{BlobId, BlobStore, BlobStoreDAO, BucketName}
+import 
org.apache.james.server.blob.deduplication.DeDuplicationBlobStore.THREAD_SWITCH_THRESHOLD
 import org.reactivestreams.Publisher
 import reactor.core.publisher.{Flux, Mono}
 import reactor.core.scala.publisher.SMono
@@ -38,7 +39,8 @@ import scala.compat.java8.FunctionConverters._
 
 object DeDuplicationBlobStore {
   val LAZY_RESOURCE_CLEANUP = false
-  val FILE_THRESHOLD = 10000
+  val FILE_THRESHOLD = 
Integer.parseInt(System.getProperty("james.deduplicating.blobstore.file.threshold",
 "10240"))
+  val THREAD_SWITCH_THRESHOLD = 
Integer.parseInt(System.getProperty("james.deduplicating.blobstore.thread.switch.threshold",
 "32768"));
 
   private def baseEncodingFrom(encodingType: String): BaseEncoding = 
encodingType match {
     case "base16" =>
@@ -124,12 +126,19 @@ class DeDuplicationBlobStore @Inject()(blobStoreDAO: 
BlobStoreDAO,
       .map(blobIdFactory.of)
       .map(blobId => Tuples.of(blobId, data))
 
-  private def withBlobIdFromArray: BlobIdProvider[Array[Byte]] = data =>
-    SMono.fromCallable(() => {
+  private def withBlobIdFromArray: BlobIdProvider[Array[Byte]] = data => {
+    if (data.length < THREAD_SWITCH_THRESHOLD) {
       val code = Hashing.sha256.hashBytes(data)
       val blobId = blobIdFactory.of(base64(code))
-      Tuples.of(blobId, data)
-    }).subscribeOn(Schedulers.parallel())
+      Mono.just(Tuples.of(blobId, data))
+    } else {
+      SMono.fromCallable(() => {
+        val code = Hashing.sha256.hashBytes(data)
+        val blobId = blobIdFactory.of(base64(code))
+        Tuples.of(blobId, data)
+      })
+    }
+  }
 
   private def base64(hashCode: HashCode) = {
     val bytes = hashCode.asBytes


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to