github-advanced-security[bot] commented on code in PR #2898: URL: https://github.com/apache/jackrabbit-oak/pull/2898#discussion_r3241445674
########## oak-benchmarks/src/main/java/org/apache/jackrabbit/oak/benchmark/SegmentCacheTarBenchmark.java: ########## @@ -0,0 +1,557 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.jackrabbit.oak.benchmark; + +import java.io.File; +import java.io.IOException; +import java.nio.file.Files; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import java.util.concurrent.ThreadLocalRandom; + +import javax.jcr.Repository; + +import org.apache.commons.io.FileUtils; +import org.apache.jackrabbit.oak.api.CommitFailedException; +import org.apache.jackrabbit.oak.api.jmx.CacheStatsMBean; +import org.apache.jackrabbit.oak.fixture.RepositoryFixture; +import org.apache.jackrabbit.oak.segment.SegmentCache.SegmentCachePolicy; +import org.apache.jackrabbit.oak.segment.SegmentId; +import org.apache.jackrabbit.oak.segment.SegmentNodeStoreBuilders; +import org.apache.jackrabbit.oak.segment.file.FileStore; +import org.apache.jackrabbit.oak.segment.file.FileStoreBuilder; +import org.apache.jackrabbit.oak.segment.file.InvalidFileStoreVersionException; +import org.apache.jackrabbit.oak.segment.file.ReadOnlyFileStore; +import org.apache.jackrabbit.oak.spi.commit.CommitInfo; +import org.apache.jackrabbit.oak.spi.commit.EmptyHook; +import org.apache.jackrabbit.oak.spi.state.NodeBuilder; + +/** + * Benchmark measuring actual wall-clock elapsed time per segment cache policy using + * real TAR file I/O. Unlike {@link SegmentCacheMemoizationBenchmark}, which uses mock + * segments (free TAR reads), cache misses here trigger actual disk reads — so a policy + * with a higher miss rate is measurably slower. + * + * <h3>Fixture note</h3> + * <p>The {@code RepositoryFixture} parameter only controls the JCR repository created by + * {@code AbstractTest} infrastructure. This benchmark creates its own {@link FileStore} in + * {@link #beforeSuite()} and always reads real TAR files, regardless of which fixture is + * passed. Use {@code Oak-MemoryNS} to avoid wasting disk space on an unused second store.</p> + * + * <h3>Access path</h3> + * <p>Every access calls {@link SegmentId#getSegment()}, which follows the full production + * chain: L1 memoization → on L1 miss: store → L2 cache → on L2 miss: loader (disk read). + * Stats decompose accesses into L1-hit%, L2-hit%, and TAR-read% (loader invocations).</p> + * + * <h3>Scenarios (all in {@code afterSuite})</h3> + * <ul> + * <li><b>Scenario 1 (Zipfian steady-state)</b> — live run driven by the AbstractTest + * timing loop; isolated per-policy elapsed time with full tier breakdown.</li> + * <li><b>Scenario 2 (drifting active set)</b> — sliding Zipfian window; Caffeine's + * W-TinyLFU admission gate rejects new-window entries (freq=0) against incumbents, + * triggering perpetual TAR-read loops. Caffeine is typically slower than Guava here.</li> + * <li><b>Scenario 3 (post-compaction cold-start)</b> — cache warmed on old-gen segments; + * traffic switches to new-gen (freq=0, LRU-cold). Per-epoch TAR% tracks warm-up speed.</li> + * </ul> + */ +public class SegmentCacheTarBenchmark extends AbstractTest { + + // ----- content generation ----- + private static final int N_NODES = 4_000; + private static final int N_BATCH = 1_000; + private static final int PROPERTY_KB = 10; + private static final int PROPERTY_BYTES = PROPERTY_KB * 1024; + + // ----- cache config: ~10 MB ≈ 40 data segments at 256 KB each ----- + private static final int CACHE_SIZE_MB = 10; + + // ----- Scenario 1: Zipfian steady-state ----- + private static final int BATCH_SIZE = Integer.getInteger("segment.batch.size", 500); + private static final int WARMUP_OPS = 5_000; + private static final int MEASURE_OPS = 50_000; + private static final double ZIPF_EXP = 1.0; + + // ----- Scenario 2: drifting active set ----- + private static final int WIDTH_2 = 100; // active window > cache capacity + private static final int DRIFT_2 = 5; // advance cursor every N ops + private static final double ZIPF_2_EXP = 0.5; // flatter → more entries compete for cache + private static final int WARMUP_2 = 20_000; + private static final int MEASURE_2 = 100_000; + private static final int EPOCH_OPS_2 = 10_000; + + // ----- Scenario 3: post-compaction cold-start ----- + private static final int WARMUP_3 = 20_000; // warm on old-gen + private static final int MEASURE_3 = 100_000; + private static final int EPOCH_OPS_3 = 10_000; + + private static final SegmentCachePolicy[] POLICIES = { + SegmentCachePolicy.CAFFEINE, + SegmentCachePolicy.CAFFEINE_WITH_EXPIRY, + SegmentCachePolicy.LIRS, + SegmentCachePolicy.GUAVA + }; + private static final String[] POLICY_NAMES = {"CAFFEINE", "CAFFEINE_WITH_EXPIRY", "LIRS", "GUAVA"}; + private static final int NUM_POLICIES = POLICIES.length; + + // ----- live-run state ----- + private File storeDir; + private int poolSize; + private double[] zipfCdf; + private ReadOnlyFileStore[] liveStores; + private SegmentId[][] liveIds; // liveIds[policy][segIdx] + private long[] liveTotalOps; // per-policy access counter for statsValues() + + @Override + public String toString() { + return "SegmentCacheTarBenchmark"; + } + + @Override + protected Repository[] createRepository(RepositoryFixture fixture) throws Exception { + return fixture.setUpCluster(1); + } + + /** + * Generates real TAR content, discovers the data-segment pool, and opens the + * per-policy live stores for the AbstractTest timing loop. + */ + @Override + protected void beforeSuite() throws Exception { + storeDir = Files.createTempDirectory("SegmentCacheTarBenchmark-").toFile(); Review Comment: ## SonarCloud / Temporary files should not be created in publicly writable directories <!--SONAR_ISSUE_KEY:AZ4miz5TrfNQ6yOhD_tx-->Make sure publicly writable directories are used safely here. <p>See more on <a href="https://sonarcloud.io/project/issues?id=org.apache.jackrabbit%3Ajackrabbit-oak&issues=AZ4miz5TrfNQ6yOhD_tx&open=AZ4miz5TrfNQ6yOhD_tx&pullRequest=2898">SonarQube Cloud</a></p> [Show more details](https://github.com/apache/jackrabbit-oak/security/code-scanning/192) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
