This is an automated email from the ASF dual-hosted git repository. rouazana pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/james-project.git
commit 0769feee3aa2c89ca70bf0ccfccf5bb03d1409be Author: Matthieu Baechler <matth...@apache.org> AuthorDate: Thu Feb 27 11:24:20 2020 +0100 JAMES-3150 Add ScalaCheck for the garbadge collector --- gc-properties.adoc | 23 +++++ server/blob/blob-deduplicating/pom.xml | 106 ++++++++++++++++++++ .../src/test/scala/GCPropertiesTest.scala | 107 +++++++++++++++++++++ server/blob/pom.xml | 1 + 4 files changed, 237 insertions(+) diff --git a/gc-properties.adoc b/gc-properties.adoc new file mode 100644 index 0000000..7c69c01 --- /dev/null +++ b/gc-properties.adoc @@ -0,0 +1,23 @@ += GC properties + +1. the execution time of the GC should be linked to +active dataset but not to global dataset +(for scalability purpose) + +2. GC should run on live dataset + + 2.1. GC should not delete data being referenced by a pending process or +still referenced + + 2.2. GC should be idempotent: 2 concurrent or sequential runs should +not have a different outcome than a single one + +3. GC should remove data from the underlying store + + 3.1. an unreferenced piece of data should be removed after 1 day + + 3.2. less than 10% of unreferenced data of a significant dataset +should persist after three GC executions + +4. GC should report what it does + diff --git a/server/blob/blob-deduplicating/pom.xml b/server/blob/blob-deduplicating/pom.xml new file mode 100644 index 0000000..e849535 --- /dev/null +++ b/server/blob/blob-deduplicating/pom.xml @@ -0,0 +1,106 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!-- + Licensed to the Apache Software Foundation (ASF) under one + or more contributor license agreements. See the NOTICE file + distributed with this work for additional information + regarding copyright ownership. The ASF licenses this file + to you under the Apache License, Version 2.0 (the + "License"); you may not use this file except in compliance + with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, + software distributed under the License is distributed on an + "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + KIND, either express or implied. See the License for the + specific language governing permissions and limitations + under the License. +--> +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd"> + <modelVersion>4.0.0</modelVersion> + + <parent> + <artifactId>james-server-blob</artifactId> + <groupId>org.apache.james</groupId> + <version>3.5.0-SNAPSHOT</version> + <relativePath>../pom.xml</relativePath> + </parent> + + <artifactId>blob-deduplicating</artifactId> + <packaging>jar</packaging> + + <name>Apache James :: Server :: Blob :: Deduplicating Blob Storage</name> + <description> + An implementation of BlobStore which deduplicate the stored blobs and use a garbage collector + to ensure their effective deletion. + </description> + + <dependencies> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>blob-api</artifactId> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>blob-api</artifactId> + <type>test-jar</type> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>blob-memory</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>james-server-testing</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>james-server-util</artifactId> + </dependency> + <dependency> + <groupId>${james.groupId}</groupId> + <artifactId>testing-base</artifactId> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scala-lang</groupId> + <artifactId>scala-library</artifactId> + </dependency> + <dependency> + <groupId>org.scala-lang.modules</groupId> + <artifactId>scala-java8-compat_${scala.base}</artifactId> + </dependency> + <dependency> + <groupId>org.scalactic</groupId> + <artifactId>scalactic_2.13</artifactId> + <version>3.1.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalatest</groupId> + <artifactId>scalatest_2.13</artifactId> + <version>3.1.1</version> + <scope>test</scope> + </dependency> + <dependency> + <groupId>org.scalacheck</groupId> + <artifactId>scalacheck_2.13</artifactId> + <version>1.14.3</version> + <scope>test</scope> + </dependency> + </dependencies> + + <build> + <plugins> + <plugin> + <groupId>net.alchim31.maven</groupId> + <artifactId>scala-maven-plugin</artifactId> + </plugin> + </plugins> + </build> + +</project> diff --git a/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala b/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala new file mode 100644 index 0000000..5de3f44 --- /dev/null +++ b/server/blob/blob-deduplicating/src/test/scala/GCPropertiesTest.scala @@ -0,0 +1,107 @@ +import org.apache.james.blob.api.{BlobId, TestBlobId} +import org.scalacheck.Gen +import org.scalatest.funsuite.AnyFunSuite + +case class Generation(id: Long) +case class Iteration(id: Long) +case class ExternalID(id: String) // TODO + +sealed trait Event +case class Reference(externalId: ExternalID, blobId: BlobId, generation: Generation) extends Event +case class Deletion(generation: Generation, reference: Reference) extends Event + +case class Report(iteration: Iteration, + blobsToDelete: Set[(Generation, BlobId)] + ) + +object Generators { + + val smallInteger = Gen.choose(0L,100L) + var current = 0; + val generationsGen: Gen[LazyList[Generation]] = Gen.infiniteLazyList(Gen.frequency((90, Gen.const(0)), (9, Gen.const(1)), (1, Gen.const(2)))) + .map(list => list.scanLeft(0)(_ + _)) + .map(list => list.map(_.toLong).map(Generation.apply)) + + val iterationGen = smallInteger.map(Iteration.apply) + + val blobIdFactory = new TestBlobId.Factory + + def blobIdGen(generation: Generation) : Gen[BlobId] = Gen.uuid.map(uuid => + blobIdFactory.from(s"${generation}_$uuid")) + + val externalIDGen = Gen.uuid.map(uuid => ExternalID(uuid.toString)) + + def referenceGen(generation: Generation): Gen[Reference] = for { + blobId <- blobIdGen(generation) + externalId <- externalIDGen + } yield Reference(externalId, blobId, generation) + + def existingReferences : Seq[Event] => Set[Reference] = _ + .foldLeft((Set[Reference](), Set[Reference]()))((acc, event) => event match { + case deletion: Deletion => (acc._1 ++ Set(deletion.reference), acc._2) + case reference: Reference => if (acc._1.contains(reference)) { + acc + } else { + (acc._1, acc._2 ++ Set(reference)) + } + })._2 + + def deletionGen(previousEvents : Seq[Event], generation: Generation): Gen[Option[Deletion]] = { + val persistingReferences = existingReferences(previousEvents) + if (persistingReferences.isEmpty) { + Gen.const(None) + } else { + Gen.oneOf(persistingReferences) + .map(reference => Deletion(generation, reference)) + .map(Some(_)) + } + } + + def duplicateReferenceGen(generation: Generation, reference: Reference): Gen[Reference] = { + if (reference.generation == generation) { + externalIDGen.map(id => reference.copy(externalId = id)) + } else { + referenceGen(generation) + } + } + + def eventGen(previousEvents: Seq[Event], generation: Generation): Gen[Event] = for { + greenAddEvent <- referenceGen(generation) + addEvents = previousEvents.flatMap { + case x: Reference => Some(x) + case _ => None + } + randomAddEvent <- Gen.oneOf(addEvents) + duplicateAddEvent <- duplicateReferenceGen(generation, randomAddEvent) + deleteEvent <- deletionGen(previousEvents, generation) + event <- Gen.oneOf(Seq(greenAddEvent, duplicateAddEvent) ++ deleteEvent) + } yield event + + def eventsGen() : Gen[Seq[Event]] = for { + nbEvents <- Gen.choose(0, 100) + generations <- generationsGen.map(_.take(nbEvents)) + startEvent <- referenceGen(Generation.apply(0)) + events <- foldM(generations, (Seq(startEvent): Seq[Event]))((previousEvents, generation) => eventGen(previousEvents, generation).map(_ +: previousEvents)) + } yield events.reverse + + def foldM[A, B](fa: LazyList[A], z: B)(f: (B, A) => Gen[B]): Gen[B] = { + def step(in: (LazyList[A], B)): Gen[Either[(LazyList[A], B), B]] = { + val (s, b) = in + if (s.isEmpty) + Gen.const(Right(b)) + else { + f (b, s.head).map { bnext => + Left((s.tail, bnext)) + } + } + } + + Gen.tailRecM((fa, z))(step) + } +} + +class GCPropertiesTest extends AnyFunSuite { + test("print sample") { + Generators.eventsGen().sample.foreach(_.foreach(println)) + } +} diff --git a/server/blob/pom.xml b/server/blob/pom.xml index 4351178..9744471 100644 --- a/server/blob/pom.xml +++ b/server/blob/pom.xml @@ -34,6 +34,7 @@ <modules> <module>blob-api</module> + <module>blob-deduplicating</module> <module>blob-cassandra</module> <module>blob-common</module> <module>blob-export-api</module> --------------------------------------------------------------------- To unsubscribe, e-mail: server-dev-unsubscr...@james.apache.org For additional commands, e-mail: server-dev-h...@james.apache.org