This is an automated email from the ASF dual-hosted git repository.
btellier pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/james-project.git
The following commit(s) were added to refs/heads/master by this push:
new d60ee4638b [FIX] Run hashing on separated thread only if relevant -
chatGPT suggestion (#2553)
d60ee4638b is described below
commit d60ee4638b61e2dd0777f98c202a2e8ab8a4b76d
Author: Benoit TELLIER <[email protected]>
AuthorDate: Sat Dec 7 14:19:54 2024 +0100
[FIX] Run hashing on separated thread only if relevant - chatGPT suggestion
(#2553)
---
docs/modules/servers/partials/configure/jvm.adoc | 12 +++++++++++-
.../sample-configuration/jvm.properties | 7 ++++++-
.../blob/deduplication/DeDuplicationBlobStore.scala | 19 ++++++++++++++-----
3 files changed, 31 insertions(+), 7 deletions(-)
diff --git a/docs/modules/servers/partials/configure/jvm.adoc
b/docs/modules/servers/partials/configure/jvm.adoc
index 15011dc388..f6b5ce0ef2 100644
--- a/docs/modules/servers/partials/configure/jvm.adoc
+++ b/docs/modules/servers/partials/configure/jvm.adoc
@@ -153,4 +153,14 @@ Ex in `jvm.properties`
----
james.relaxed.mailbox.name.validation=true
----
-To relax validating `\*` and `%` characters in the mailbox name. Be careful as
`%` and `*` are ambiguous for the LIST / LSUB commands that interpret those as
wildcard thus returning all mailboxes matching the pattern.
\ No newline at end of file
+To relax validat ing `\*` and `%` characters in the mailbox name. Be careful
as `%` and `*` are ambiguous for the LIST / LSUB commands that interpret those
as wildcard thus returning all mailboxes matching the pattern.
+
+== Customizing blob deduplication settings
+
+----
+# Count of octet from which hashing shall be done out of the IO threads in
deduplicating blob store
+james.deduplicating.blobstore.thread.switch.threshold=32768
+
+# Count of octet from which streams are buffered to files and not to memory
+james.deduplicating.blobstore.file.threshold=10240
+----
\ No newline at end of file
diff --git a/server/apps/distributed-app/sample-configuration/jvm.properties
b/server/apps/distributed-app/sample-configuration/jvm.properties
index 9365a7fe91..a5e6b4bc58 100644
--- a/server/apps/distributed-app/sample-configuration/jvm.properties
+++ b/server/apps/distributed-app/sample-configuration/jvm.properties
@@ -88,4 +88,9 @@ jmx.remote.x.mlet.allow.getMBeansFromURL=false
# Relax validating `*` and `%` characters in the mailbox name. Defaults to
false.
# Be careful turning on this as `%` and `*` are ambiguous for the LIST / LSUB
commands that interpret those as wildcard thus returning all mailboxes matching
the pattern.
-#james.relaxed.mailbox.name.validation=true
\ No newline at end of file
+#james.relaxed.mailbox.name.validation=true
+
+# Count of octet from which hashing shall be done out of the IO threads in
deduplicating blob store
+# james.deduplicating.blobstore.thread.switch.threshold=32768
+# Count of octet from which streams are buffered to files and not to memory
+# james.deduplicating.blobstore.file.threshold=10240
\ No newline at end of file
diff --git
a/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
b/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
index dda70bf4c3..92dfb7f005 100644
---
a/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
+++
b/server/blob/blob-storage-strategy/src/main/scala/org/apache/james/server/blob/deduplication/DeDuplicationBlobStore.scala
@@ -26,6 +26,7 @@ import jakarta.inject.{Inject, Named}
import org.apache.commons.io.IOUtils
import org.apache.james.blob.api.BlobStore.BlobIdProvider
import org.apache.james.blob.api.{BlobId, BlobStore, BlobStoreDAO, BucketName}
+import
org.apache.james.server.blob.deduplication.DeDuplicationBlobStore.THREAD_SWITCH_THRESHOLD
import org.reactivestreams.Publisher
import reactor.core.publisher.{Flux, Mono}
import reactor.core.scala.publisher.SMono
@@ -38,7 +39,8 @@ import scala.compat.java8.FunctionConverters._
object DeDuplicationBlobStore {
val LAZY_RESOURCE_CLEANUP = false
- val FILE_THRESHOLD = 10000
+ val FILE_THRESHOLD =
Integer.parseInt(System.getProperty("james.deduplicating.blobstore.file.threshold",
"10240"))
+ val THREAD_SWITCH_THRESHOLD =
Integer.parseInt(System.getProperty("james.deduplicating.blobstore.thread.switch.threshold",
"32768"));
private def baseEncodingFrom(encodingType: String): BaseEncoding =
encodingType match {
case "base16" =>
@@ -124,12 +126,19 @@ class DeDuplicationBlobStore @Inject()(blobStoreDAO:
BlobStoreDAO,
.map(blobIdFactory.of)
.map(blobId => Tuples.of(blobId, data))
- private def withBlobIdFromArray: BlobIdProvider[Array[Byte]] = data =>
- SMono.fromCallable(() => {
+ private def withBlobIdFromArray: BlobIdProvider[Array[Byte]] = data => {
+ if (data.length < THREAD_SWITCH_THRESHOLD) {
val code = Hashing.sha256.hashBytes(data)
val blobId = blobIdFactory.of(base64(code))
- Tuples.of(blobId, data)
- }).subscribeOn(Schedulers.parallel())
+ Mono.just(Tuples.of(blobId, data))
+ } else {
+ SMono.fromCallable(() => {
+ val code = Hashing.sha256.hashBytes(data)
+ val blobId = blobIdFactory.of(base64(code))
+ Tuples.of(blobId, data)
+ })
+ }
+ }
private def base64(hashCode: HashCode) = {
val bytes = hashCode.asBytes
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]