AlexanderAshitkin commented on code in PR #91: URL: https://github.com/apache/maven-build-cache-extension/pull/91#discussion_r1313150946
########## src/main/java/org/apache/maven/buildcache/checksum/MavenProjectInput.java: ########## @@ -633,13 +714,7 @@ private static boolean isReadable(Path entry) throws IOException { } private boolean isFilteredOutSubpath(Path path) { - Path normalized = path.normalize(); - for (Path filteredOutDir : filteredOutPaths) { - if (normalized.startsWith(filteredOutDir)) { - return true; - } - } - return false; + return inputExcludeDirectoryPathMatchers.stream().anyMatch(matcher -> matcher.stopTreeWalking(path)); Review Comment: Well, there are different semantics now and before for project-level properties. Before: declaring `<exclude.me>mydir/mysubdir</exclude.me>` exclusion was filtering out the whole subtree (through the `Path#startsWith`) Now: declaring `mydir/mysubdir` doesn't exclude anything. ``` FileSystems.getDefault().getPathMatcher("glob:dir"); System.out.println(pathMatcher.matches(Paths.get("dir/file.txt")));` -----OUT----- false ``` This is what I meant - properties might assume the exclusion of directories. For consistency, we must update docs, keep the path semantics, or migrate properties to globs/regex semantics, delegating filtering expressions to the users. But the main point is that it should be consistent with inclusion semantics (and maven in general), transparent to an end user, and deterministic. ########## src/main/java/org/apache/maven/buildcache/checksum/MavenProjectInput.java: ########## @@ -165,45 +184,94 @@ public MavenProjectInput( this.repoSystem = repoSystem; this.remoteCache = remoteCache; Properties properties = project.getProperties(); - this.dirGlob = properties.getProperty(CACHE_INPUT_GLOB_NAME, config.getDefaultGlob()); + this.defaultFilenameGlob = properties.getProperty(CACHE_INPUT_GLOB_NAME, config.getDefaultGlob()); this.processPlugins = Boolean.parseBoolean(properties.getProperty(CACHE_PROCESS_PLUGINS, config.isProcessPlugins())); this.tmpDir = System.getProperty("java.io.tmpdir"); + this.baseDirectoryGlob = baseDirPath.toString().replace("\\", "/") + "/"; + org.apache.maven.model.Build build = project.getBuild(); - filteredOutPaths = new ArrayList<>(Arrays.asList( - normalizedPath(build.getDirectory()), // target by default - normalizedPath(build.getOutputDirectory()), - normalizedPath(build.getTestOutputDirectory()))); + addToExcludedSection( + convertToPathMatcherFileSeperator( + normalizedPath(build.getDirectory()).toString()) + + GLOB_SX_ALL_SUB_FILES, + false); // target by default + addToExcludedSection( + convertToPathMatcherFileSeperator( + normalizedPath(build.getOutputDirectory()).toString()) + + GLOB_SX_ALL_SUB_FILES, + false); // target/classes by default + addToExcludedSection( + convertToPathMatcherFileSeperator( + normalizedPath(build.getTestOutputDirectory()).toString()) + + GLOB_SX_ALL_SUB_FILES, + false); // target/test-classes by default List<Exclude> excludes = config.getGlobalExcludePaths(); for (Exclude excludePath : excludes) { - filteredOutPaths.add(Paths.get(excludePath.getValue())); + addToExcludedSection(excludePath.getValue(), true); } for (String propertyName : properties.stringPropertyNames()) { if (propertyName.startsWith(CACHE_EXCLUDE_NAME)) { String propertyValue = properties.getProperty(propertyName); - Path path = Paths.get(propertyValue); - filteredOutPaths.add(path); + addToExcludedSection(propertyValue, true); + if (LOGGER.isDebugEnabled()) { LOGGER.debug( - "Adding an excludePath from property '{}', values is '{}', path is '{}' ", - propertyName, - propertyValue, - path); + "Adding an excludePath from property '{}', value is '{}'", propertyName, propertyValue); } } } CacheUtils.debugPrintCollection( - LOGGER, - filteredOutPaths, - "List of excluded paths (checked either by fileName or by startsWith prefix)", - "Path entry"); + LOGGER, inputExcludePathMatcherString, "List of excluded glob patterns", "Pattern"); this.fileComparator = new PathIgnoringCaseComparator(); } + private String convertToPathMatcherFileSeperator(String path) { + return path.replace("\\", "/"); + } + + /** + * Add a value from the excluded section list to the directories and/or the filenames ban list. + * @param excludedValue a value from the exclude list + */ + private void addToExcludedSection(String excludedValue, boolean addProjectBaseDir) { + + String pathMatcherGlob = GLOB_PX + + + // Add the base directory to any input directly coming from user configuration + (addProjectBaseDir ? baseDirectoryGlob : "") + + + // If the glob start with "/", we remove it since it's already added in the added basedir glob + (excludedValue.startsWith("/") ? excludedValue.substring(1) : excludedValue); + + // In order to skip unnecessary subtree dir walking, we use a different PathMatcher list for "directories" or + // "files + directories" + inputExcludePathMatchers.add(new TreeWalkerPathMatcher(pathMatcherGlob, false)); + + // Globs ending with "**" should end any sub-directory inspection + if (pathMatcherGlob.endsWith("**")) { Review Comment: Well, there are different semantics now and before for project-level properties. Before: declaring `<exclude.me>mydir/mysubdir</exclude.me>` exclusion was filtering out the whole subtree (through the `Path#startsWith`) Now: declaring `mydir/mysubdir` doesn't exclude anything. ``` FileSystems.getDefault().getPathMatcher("glob:dir"); System.out.println(pathMatcher.matches(Paths.get("dir/file.txt")));` -----OUT----- false ``` For consistency, we must keep the path semantics of properties or migrate them to the globs/regex semantics, delegating filtering expressions to the users. But the main point is that it should be consistent, transparent, and deterministic. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: issues-unsubscr...@maven.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org