This is an automated email from the ASF dual-hosted git repository. btellier pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 5611a3f9870251049222925d6318a52e9ad375ed Author: Benoit TELLIER <[email protected]> AuthorDate: Mon Dec 2 23:04:04 2024 +0100 [FIX] Improve S3 MinIO support with MinIOGenerationAwareBlobId MinIO compatibility The idea is to leverage hashing random distribution in order to split blobs in several subfolders. This prevent the creation of a single folder with all tmail blobs in it, which with MinIO may lead to the impossibility to browse objects. The introduced MinIOGenerationAwareBlobId is intended to be retro-compatible with existing GenerationAwareBlobId (not understanding its generations but still parsing those blobs as is). It splits the wrapped blobId into subfolders upon initial instanciations. --- .../deduplication/MinIOGenerationAwareBlobId.java | 169 +++++++++++++ .../MinIOGenerationAwareBlobIdTest.java | 281 +++++++++++++++++++++ .../blobstore/BlobDeduplicationGCModule.java | 11 +- 3 files changed, 460 insertions(+), 1 deletion(-) diff --git a/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobId.java b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobId.java new file mode 100644 index 0000000000..ac0fd4b8cb --- /dev/null +++ b/server/blob/blob-storage-strategy/src/main/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobId.java @@ -0,0 +1,169 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.server.blob.deduplication; + +import static org.apache.james.server.blob.deduplication.GenerationAwareBlobId.NO_FAMILY; +import static org.apache.james.server.blob.deduplication.GenerationAwareBlobId.computeGeneration; + +import java.time.Clock; +import java.time.Instant; +import java.util.Objects; + +import jakarta.inject.Inject; + +import org.apache.james.blob.api.BlobId; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.base.MoreObjects; +import com.google.common.base.Preconditions; + +public class MinIOGenerationAwareBlobId implements BlobId, GenerationAware { + public static class Factory implements BlobId.Factory { + public static final int NO_FAMILY = 0; + public static final int NO_GENERATION = 0; + + private final Clock clock; + private final GenerationAwareBlobId.Configuration configuration; + private final BlobId.Factory delegate; + + @Inject + public Factory(Clock clock, GenerationAwareBlobId.Configuration configuration, BlobId.Factory delegate) { + this.clock = clock; + this.configuration = configuration; + this.delegate = delegate; + } + + @Override + public BlobId parse(String id) { + int separatorIndex1 = id.indexOf('/'); + if (separatorIndex1 == -1 || separatorIndex1 == id.length() - 1) { + return decorateWithoutGeneration(id); + } + int separatorIndex2 = id.indexOf('/', separatorIndex1 + 1); + if (separatorIndex2 == -1 || separatorIndex2 == id.length() - 1) { + return decorateWithoutGeneration(id); + } + try { + int family = Integer.parseInt(id.substring(0, separatorIndex1)); + int generation = Integer.parseInt(id.substring(separatorIndex1 + 1, separatorIndex2)); + if (family < 0 || generation < 0) { + return decorateWithoutGeneration(id); + } + BlobId wrapped = delegate.parse(id.substring(separatorIndex2 + 1)); + return new MinIOGenerationAwareBlobId(generation, family, wrapped); + } catch (NumberFormatException e) { + return delegate.parse(id.substring(separatorIndex2 + 1)); + } + } + + private static String injectFoldersInBlobId(String blobIdPart) { + int folderDepthToCreate = 4; + if (blobIdPart.length() > folderDepthToCreate) { + return blobIdPart.charAt(0) + "/" + + blobIdPart.charAt(1) + "/" + + blobIdPart.charAt(2) + "/" + + blobIdPart.charAt(3) + "/" + + blobIdPart.substring(4); + } + return blobIdPart; + } + + private GenerationAwareBlobId decorateWithoutGeneration(String id) { + return new GenerationAwareBlobId(NO_GENERATION, NO_FAMILY, delegate.parse(id)); + } + + @Override + public BlobId of(String id) { + return decorate(delegate.of(injectFoldersInBlobId(id))); + } + + private MinIOGenerationAwareBlobId decorate(BlobId blobId) { + return new MinIOGenerationAwareBlobId(computeGeneration(configuration, clock.instant()), + configuration.getFamily(), + blobId); + } + } + + private final long generation; + private final int family; + private final BlobId delegate; + + @VisibleForTesting + MinIOGenerationAwareBlobId(long generation, int family, BlobId delegate) { + Preconditions.checkArgument(generation >= 0, "'generation' should not be negative"); + Preconditions.checkArgument(family >= 0, "'family' should not be negative"); + this.generation = generation; + this.family = family; + this.delegate = delegate; + } + + @Override + public String asString() { + if (family == NO_FAMILY) { + return delegate.asString(); + } + return family + "/" + generation + "/" + delegate.asString(); + } + + @Override + public boolean inActiveGeneration(GenerationAwareBlobId.Configuration configuration, Instant now) { + return configuration.getFamily() == this.family && + generation + 1 >= computeGeneration(configuration, now); + } + + @VisibleForTesting + long getGeneration() { + return generation; + } + + @VisibleForTesting + int getFamily() { + return family; + } + + @VisibleForTesting + BlobId getDelegate() { + return delegate; + } + + @Override + public final boolean equals(Object obj) { + if (obj instanceof MinIOGenerationAwareBlobId other) { + return Objects.equals(generation, other.generation) + && Objects.equals(delegate, other.delegate) + && Objects.equals(family, other.family); + } + return false; + } + + @Override + public final int hashCode() { + return Objects.hash(generation, family, delegate); + } + + @Override + public String toString() { + return MoreObjects + .toStringHelper(this) + .add("generation", generation) + .add("delegate", delegate) + .toString(); + } +} diff --git a/server/blob/blob-storage-strategy/src/test/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobIdTest.java b/server/blob/blob-storage-strategy/src/test/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobIdTest.java new file mode 100644 index 0000000000..8dd11b9182 --- /dev/null +++ b/server/blob/blob-storage-strategy/src/test/java/org/apache/james/server/blob/deduplication/MinIOGenerationAwareBlobIdTest.java @@ -0,0 +1,281 @@ +/**************************************************************** + * Licensed to the Apache Software Foundation (ASF) under one * + * or more contributor license agreements. See the NOTICE file * + * distributed with this work for additional information * + * regarding copyright ownership. The ASF licenses this file * + * to you under the Apache License, Version 2.0 (the * + * "License"); you may not use this file except in compliance * + * with the License. You may obtain a copy of the License at * + * * + * http://www.apache.org/licenses/LICENSE-2.0 * + * * + * Unless required by applicable law or agreed to in writing, * + * software distributed under the License is distributed on an * + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY * + * KIND, either express or implied. See the License for the * + * specific language governing permissions and limitations * + * under the License. * + ****************************************************************/ + +package org.apache.james.server.blob.deduplication; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.assertj.core.api.Assertions.assertThatCode; +import static org.assertj.core.api.Assertions.assertThatThrownBy; + +import java.time.Instant; +import java.time.temporal.ChronoUnit; +import java.util.UUID; + +import org.apache.james.blob.api.BlobId; +import org.apache.james.blob.api.PlainBlobId; +import org.apache.james.utils.UpdatableTickingClock; +import org.assertj.core.api.SoftAssertions; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Nested; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.ValueSource; + +import nl.jqno.equalsverifier.EqualsVerifier; + +class MinIOGenerationAwareBlobIdTest { + private static final Instant NOW = Instant.parse("2021-08-19T10:15:30.00Z"); + + private BlobId.Factory delegate; + private MinIOGenerationAwareBlobId.Factory testee; + + @BeforeEach + void setUp() { + delegate = new PlainBlobId.Factory(); + UpdatableTickingClock clock = new UpdatableTickingClock(NOW); + testee = new MinIOGenerationAwareBlobId.Factory(clock, GenerationAwareBlobId.Configuration.DEFAULT, delegate); + } + + @Nested + class BlobIdGeneration { + @Test + void ofShouldGenerateABlobIdOfTheRightGeneration() { + String key = "4ccb692e-3efb-40e9-8876-4ecfd51ffd4d"; + MinIOGenerationAwareBlobId actual = (MinIOGenerationAwareBlobId) testee.of(key); + + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actual.getFamily()).isEqualTo(GenerationAwareBlobId.Configuration.DEFAULT.getFamily()); + soft.assertThat(actual.getGeneration()).isEqualTo(628L); + soft.assertThat(actual.getDelegate()).isEqualTo(delegate.of("4/c/c/b/692e-3efb-40e9-8876-4ecfd51ffd4d")); + }); + } + } + + @Nested + class BlobIdParsing { + @Test + void asStringValueShouldBeParsable() { + String key = UUID.randomUUID().toString(); + BlobId minioBlobId = testee.of(key); + String minioBlobIdAsString = minioBlobId.asString(); + + BlobId blobId = testee.parse(minioBlobIdAsString); + assertThat(blobId).isEqualTo(minioBlobId); + } + + @Test + void previousBlobIdsShouldBeParsable() { + String blobIdString = delegate.of("abcdef").asString(); + + BlobId actual = testee.parse(blobIdString); + assertThat(actual) + .isInstanceOfSatisfying(GenerationAwareBlobId.class, actualBlobId -> { + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actualBlobId.getFamily()).isEqualTo(0); + soft.assertThat(actualBlobId.getGeneration()).isEqualTo(0L); + soft.assertThat(actualBlobId.getDelegate().asString()).isEqualTo(blobIdString); + }); + }); + } + + @Test + void noFamilyShouldBeParsable() { + String originalBlobId = "abcdef"; + String blobIdString = "0/0/" + delegate.of(originalBlobId).asString(); + + BlobId actual = testee.parse(blobIdString); + assertThat(actual) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> { + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actualBlobId.getFamily()).isEqualTo(0); + soft.assertThat(actualBlobId.getGeneration()).isEqualTo(0L); + soft.assertThat(actualBlobId.getDelegate()).isEqualTo(delegate.of(originalBlobId)); + }); + }); + } + + @Test + void generationBlobIdShouldBeParsable() { + String originalBlobId = "abcdef"; + String blobIdString = "12/126/" + delegate.of(originalBlobId).asString(); + + BlobId actual = testee.parse(blobIdString); + assertThat(actual) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> { + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actualBlobId.getFamily()).isEqualTo(12); + soft.assertThat(actualBlobId.getGeneration()).isEqualTo(126L); + soft.assertThat(actualBlobId.getDelegate()).isEqualTo(delegate.of(originalBlobId)); + }); + }); + } + + @Test + void wrappedBlobIdCanContainSeparator() { + String blobIdString = "12/126/ab/c"; + + BlobId actual = testee.parse(blobIdString); + assertThat(actual) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> { + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actualBlobId.getFamily()).isEqualTo(12); + soft.assertThat(actualBlobId.getGeneration()).isEqualTo(126L); + soft.assertThat(actualBlobId.getDelegate()).isEqualTo(delegate.of("ab/c")); + }); + }); + } + + @ParameterizedTest + @ValueSource(strings = { + "abcdefgh", + "abcdefgh/", + "1/abcdefgh", + "1/2", + "1/2/", + "/abcdefgh" + }) + void fromShouldFallbackWhenNotApplicable(String blobIdString) { + BlobId actual = testee.parse(blobIdString); + assertThat(actual) + .isInstanceOfSatisfying(GenerationAwareBlobId.class, actualBlobId -> { + SoftAssertions.assertSoftly(soft -> { + soft.assertThat(actualBlobId.getFamily()).isEqualTo(0); + soft.assertThat(actualBlobId.getGeneration()).isEqualTo(0L); + soft.assertThat(actualBlobId.getDelegate()).isEqualTo(delegate.parse(blobIdString)); + }); + }); + } + + @Nested + class Failures { + @Test + void emptyShouldFail() { + assertThatThrownBy(() -> testee.parse("")) + .isInstanceOf(IllegalArgumentException.class); + } + + @Test + void nullShouldFailShouldFail() { + assertThatThrownBy(() -> testee.parse(null)) + .isInstanceOf(NullPointerException.class); + } + + @ParameterizedTest + @ValueSource(strings = { + "invalid/2/abcdefgh", + "1/invalid/abcdefgh", + "1//abcdefgh", + "//abcdefgh", + "/1/abcdefgh", + "-1/2/abcdefgh", + "1/-1/abcdefgh", + }) + void fromShouldFallbackWhenNotApplicable(String blobIdString) { + assertThatCode(() -> testee.parse(blobIdString)) + .doesNotThrowAnyException(); + } + } + } + + @Nested + class BlobIdPojo { + + @Test + void shouldMatchPojoContract() { + EqualsVerifier.forClass(MinIOGenerationAwareBlobId.class) + .verify(); + } + + @Test + void asStringShouldIntegrateFamilyAndGeneration() { + BlobId blobId = testee.of("36825033-d835-4490-9f5a-eef120b1e85c"); + + assertThat(blobId.asString()).isEqualTo("1/628/3/6/8/2/5033-d835-4490-9f5a-eef120b1e85c"); + } + + @Test + void asStringShouldReturnDelegateForZeroFamily() { + BlobId blobId = new MinIOGenerationAwareBlobId(0, 0, delegate.parse("36825033-d835-4490-9f5a-eef120b1e85c")); + + assertThat(blobId.asString()).isEqualTo("36825033-d835-4490-9f5a-eef120b1e85c"); + } + + @ParameterizedTest + @ValueSource(strings = { + "1/2/a/b/c/d/efgh", + "abc", + "1/2/abc" + }) + void asStringShouldRevertFromString(String blobIdString) { + BlobId blobId = testee.parse(blobIdString); + + assertThat(blobId.asString()).isEqualTo(blobIdString); + } + + @Test + void noGenerationShouldNeverBeInActiveGeneration() { + MinIOGenerationAwareBlobId blobId = new MinIOGenerationAwareBlobId(0, 0, delegate.parse("36825033-d835-4490-9f5a-eef120b1e85c")); + + assertThat(blobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW)).isFalse(); + } + + @Test + void inActiveGenerationShouldReturnTrueWhenSameDate() { + BlobId blobId = testee.of("36825033-d835-4490-9f5a-eef120b1e85c"); + + assertThat(blobId) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> + assertThat(actualBlobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW)).isTrue()); + } + + @Test + void inActiveGenerationShouldReturnTrueWhenInTheFuture() { + BlobId blobId = testee.of("36825033-d835-4490-9f5a-eef120b1e85c"); + + assertThat(blobId) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> + assertThat(actualBlobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW.minus(60, ChronoUnit.DAYS))).isTrue()); + } + + @Test + void inActiveGenerationShouldReturnTrueForAtLeastOneMoreMonth() { + BlobId blobId = testee.of("36825033-d835-4490-9f5a-eef120b1e85c"); + assertThat(blobId) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> + assertThat(actualBlobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW.plus(30, ChronoUnit.DAYS))).isTrue()); + } + + @Test + void inActiveGenerationShouldReturnFalseAfterTwoMonth() { + BlobId blobId = testee.of("36825033-d835-4490-9f5a-eef120b1e85c"); + + assertThat(blobId) + .isInstanceOfSatisfying(MinIOGenerationAwareBlobId.class, actualBlobId -> + assertThat(actualBlobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW.plus(60, ChronoUnit.DAYS))).isFalse()); + } + + @Test + void inActiveGenerationShouldReturnFalseWhenDistinctFamily() { + MinIOGenerationAwareBlobId blobId = new MinIOGenerationAwareBlobId(628L, 2, delegate.of("36825033-d835-4490-9f5a-eef120b1e85c")); + + assertThat(blobId.inActiveGeneration(GenerationAwareBlobId.Configuration.DEFAULT, NOW.plus(60, ChronoUnit.DAYS))) + .isFalse(); + } + } +} \ No newline at end of file diff --git a/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java b/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java index 07c23818e0..073c1aa91f 100644 --- a/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java +++ b/server/container/guice/blob/deduplication-gc/src/main/java/org/apache/james/modules/blobstore/BlobDeduplicationGCModule.java @@ -21,6 +21,7 @@ package org.apache.james.modules.blobstore; import java.io.FileNotFoundException; import java.time.Clock; +import java.util.Optional; import java.util.Set; import org.apache.commons.configuration2.Configuration; @@ -35,6 +36,7 @@ import org.apache.james.modules.blobstore.server.BlobRoutesModules; import org.apache.james.server.blob.deduplication.BlobGCTaskAdditionalInformationDTO; import org.apache.james.server.blob.deduplication.BlobGCTaskDTO; import org.apache.james.server.blob.deduplication.GenerationAwareBlobId; +import org.apache.james.server.blob.deduplication.MinIOGenerationAwareBlobId; import org.apache.james.server.task.json.dto.AdditionalInformationDTO; import org.apache.james.server.task.json.dto.AdditionalInformationDTOModule; import org.apache.james.server.task.json.dto.TaskDTO; @@ -68,7 +70,14 @@ public class BlobDeduplicationGCModule extends AbstractModule { @Singleton @Provides public BlobId.Factory generationAwareBlobIdFactory(Clock clock, PlainBlobId.Factory delegate, GenerationAwareBlobId.Configuration configuration) { - return new GenerationAwareBlobId.Factory(clock, delegate, configuration); + String property = System.getProperty("james.s3.minio.compatibility.mode"); + boolean compatibilityModeActivated = Optional.ofNullable(property).map(Boolean::parseBoolean).orElse(false); + + if (compatibilityModeActivated) { + return new MinIOGenerationAwareBlobId.Factory(clock, configuration, delegate); + } else { + return new GenerationAwareBlobId.Factory(clock, delegate, configuration); + } } @Singleton --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
