awcosand commented on code in PR #87: URL: https://github.com/apache/tomcat-jakartaee-migration/pull/87#discussion_r2546763939
########## src/main/java/org/apache/tomcat/jakartaee/MigrationCache.java: ########## @@ -0,0 +1,498 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.tomcat.jakartaee; + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileReader; +import java.io.FileWriter; +import java.io.IOException; +import java.nio.file.Files; +import java.security.MessageDigest; +import java.security.NoSuchAlgorithmException; +import java.time.LocalDate; +import java.time.format.DateTimeFormatter; +import java.time.format.DateTimeParseException; +import java.util.HashMap; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.UUID; +import java.util.logging.Level; +import java.util.logging.Logger; + +/** + * Cache for storing and retrieving pre-converted archive files. + * + * <h2>Cache Structure</h2> + * <p>The cache organizes files in a directory structure based on hash values:</p> + * <pre> + * {cacheDir}/ + * ├── cache-metadata.txt # Metadata file tracking access times + * ├── {XX}/ # Subdirectory named by first 2 chars of hash + * │ └── {hash}.jar # Cached converted archive (full SHA-256 hash) + * ├── {YY}/ + * │ └── {hash}.jar + * └── temp-{uuid}.tmp # Temporary files during conversion + * </pre> + * + * <h2>Cache Key</h2> + * <p>Each cache entry is keyed by a SHA-256 hash computed from:</p> + * <ul> + * <li>The migration profile name (e.g., "TOMCAT", "EE")</li> + * <li>The pre-conversion archive content (as bytes)</li> + * </ul> + * <p>This ensures that the same archive converted with different profiles + * produces different cache entries.</p> + * + * <h2>Metadata Format</h2> + * <p>The {@code cache-metadata.txt} file tracks access times for cache pruning:</p> + * <pre> + * # Migration cache metadata - hash|last_access_date + * {hash}|{YYYY-MM-DD} + * {hash}|{YYYY-MM-DD} + * </pre> + * + * <h2>Temporary Files</h2> + * <p>During conversion, output is written to temporary files named {@code temp-{uuid}.tmp}. + * These files are cleaned up on startup to handle crashes or unexpected shutdowns.</p> + */ +public class MigrationCache { + + private static final Logger logger = Logger.getLogger(MigrationCache.class.getCanonicalName()); + private static final StringManager sm = StringManager.getManager(MigrationCache.class); + private static final String METADATA_FILE = "cache-metadata.txt"; + private static final DateTimeFormatter DATE_FORMATTER = DateTimeFormatter.ISO_LOCAL_DATE; + + private final File cacheDir; + private final boolean enabled; + private final int retentionDays; + private final Map<String, LocalDate> cacheMetadata; + private final File metadataFile; + + /** + * Construct a new migration cache. + * + * @param cacheDir the directory to store cached files (null to disable caching) + * @param retentionDays the number of days to retain cached files + * @throws IOException if the cache directory cannot be created + */ + public MigrationCache(File cacheDir, int retentionDays) throws IOException { + if (cacheDir == null) { + this.cacheDir = null; + this.enabled = false; + this.retentionDays = 0; + this.cacheMetadata = new HashMap<>(); + this.metadataFile = null; + } else { + this.cacheDir = cacheDir; + this.enabled = true; + this.retentionDays = retentionDays; + this.cacheMetadata = new HashMap<>(); + this.metadataFile = new File(cacheDir, METADATA_FILE); + + // Create cache directory if it doesn't exist + if (!cacheDir.exists()) { + if (!cacheDir.mkdirs()) { + throw new IOException(sm.getString("cache.cannotCreate", cacheDir.getAbsolutePath())); + } + } + + if (!cacheDir.isDirectory()) { + throw new IOException(sm.getString("cache.notDirectory", cacheDir.getAbsolutePath())); + } + + // Load existing metadata + loadMetadata(); + + // Clean up any orphaned temp files from previous crashes + cleanupTempFiles(); + + logger.log(Level.INFO, sm.getString("cache.enabled", cacheDir.getAbsolutePath(), retentionDays)); + } + } + + /** + * Clean up any temporary files left over from previous crashes or unexpected shutdowns. + * Scans the cache directory for temp-*.tmp files and deletes them. + */ + private void cleanupTempFiles() { + File[] files = cacheDir.listFiles(); + if (files != null) { + int cleanedCount = 0; + for (File file : files) { + if (file.isFile() && file.getName().startsWith("temp-") && file.getName().endsWith(".tmp")) { + if (file.delete()) { + cleanedCount++; + logger.log(Level.FINE, sm.getString("cache.tempfile.cleaned", file.getName())); + } else { + logger.log(Level.WARNING, sm.getString("cache.tempfile.cleanFailed", file.getName())); + } + } + } + if (cleanedCount > 0) { + logger.log(Level.INFO, sm.getString("cache.tempfiles.cleaned", cleanedCount)); + } + } + } + + /** + * Load cache metadata from disk. + * Format: hash|YYYY-MM-DD + * If file doesn't exist or is corrupt, assumes all existing cached jars were accessed today. + */ + private void loadMetadata() { + LocalDate today = LocalDate.now(); + + if (!metadataFile.exists()) { + // Metadata file doesn't exist - scan cache directory and assume all files accessed today + logger.log(Level.FINE, sm.getString("cache.metadata.notFound")); + scanCacheDirectory(today); + return; + } + + try (BufferedReader reader = new BufferedReader(new FileReader(metadataFile))) { + String line; + while ((line = reader.readLine()) != null) { + line = line.trim(); + if (line.isEmpty() || line.startsWith("#")) { + continue; + } + + String[] parts = line.split("\\|"); + if (parts.length == 2) { + String hash = parts[0]; + try { + LocalDate lastAccessed = LocalDate.parse(parts[1], DATE_FORMATTER); + cacheMetadata.put(hash, lastAccessed); + } catch (DateTimeParseException e) { + logger.log(Level.WARNING, sm.getString("cache.metadata.invalidDate", line)); + } + } else { + logger.log(Level.WARNING, sm.getString("cache.metadata.invalidLine", line)); + } + } + + // Check for any cached files not in metadata and add them with today's date + Set<String> existingHashes = scanCacheDirectory(null); + for (String hash : existingHashes) { + if (!cacheMetadata.containsKey(hash)) { + cacheMetadata.put(hash, today); + } + } + + logger.log(Level.FINE, sm.getString("cache.metadata.loaded", cacheMetadata.size())); + } catch (IOException e) { + // Corrupt or unreadable - assume all cached files accessed today + logger.log(Level.WARNING, sm.getString("cache.metadata.loadError"), e); + cacheMetadata.clear(); + scanCacheDirectory(today); + } + } + + /** + * Scan cache directory for existing cache files and return their hashes. + * If accessDate is not null, adds all found hashes to metadata with that date. + * + * @param accessDate the date to use for all found files (null to not update metadata) + * @return set of hashes found in cache directory + */ + private Set<String> scanCacheDirectory(LocalDate accessDate) { + Set<String> hashes = new HashSet<>(); + + File[] subdirs = cacheDir.listFiles(); + if (subdirs != null) { + for (File subdir : subdirs) { + if (subdir.isDirectory()) { + File[] files = subdir.listFiles(); + if (files != null) { + for (File file : files) { + if (file.isFile() && file.getName().endsWith(".jar")) { + String hash = file.getName().substring(0, file.getName().length() - 4); + hashes.add(hash); + if (accessDate != null) { + cacheMetadata.put(hash, accessDate); + } + } + } + } + } + } + } + + return hashes; + } + + /** + * Check if caching is enabled. + * + * @return true if caching is enabled + */ + public boolean isEnabled() { + return enabled; + } + Review Comment: removed -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
