A first attempt to create a tool to categorize license headers.
Project: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/commit/aaa4ac02 Tree: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/tree/aaa4ac02 Diff: http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/diff/aaa4ac02 Branch: refs/heads/master Commit: aaa4ac02885268d59de82d7a21bef0f41e3e26c6 Parents: 3cdd16d Author: Jan Lahoda <[email protected]> Authored: Sat Sep 9 10:38:48 2017 +0200 Committer: Jan Lahoda <[email protected]> Committed: Sat Sep 9 10:38:48 2017 +0200 ---------------------------------------------------------------------- .gitignore | 4 +- convert/nbproject/project.properties | 6 +- convert/src/convert/CategorizeLicenses.java | 174 +++++++++++++++++++++++ 3 files changed, 181 insertions(+), 3 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/.gitignore ---------------------------------------------------------------------- diff --git a/.gitignore b/.gitignore index 34f5785..f4a9236 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,3 @@ -/convert/nbproject/private/ \ No newline at end of file +/convert/nbproject/private/ +/convert/build/ +/convert/dist/ \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/nbproject/project.properties ---------------------------------------------------------------------- diff --git a/convert/nbproject/project.properties b/convert/nbproject/project.properties index 0227ea7..4b111c8 100644 --- a/convert/nbproject/project.properties +++ b/convert/nbproject/project.properties @@ -1,9 +1,10 @@ annotation.processing.enabled=true annotation.processing.enabled.in.editor=false -annotation.processing.processor.options= annotation.processing.processors.list= annotation.processing.run.all.processors=true annotation.processing.source.output=${build.generated.sources.dir}/ap-source-output +application.title=convert +application.vendor=lahvac build.classes.dir=${build.dir}/classes build.classes.excludes=**/*.java,**/*.form # This directory is removed when the project is cleaned: @@ -32,6 +33,7 @@ dist.jar=${dist.dir}/convert.jar dist.javadoc.dir=${dist.dir}/javadoc dist.jlink.dir=${dist.dir}/jlink dist.jlink.output=${dist.jlink.dir}/convert +endorsed.classpath= excludes= includes=** jar.compress=false @@ -71,7 +73,7 @@ jlink.additionalmodules= jlink.additionalparam= jlink.launcher=true jlink.launcher.name=convert -main.class= +main.class=convert.CategorizeLicenses manifest.file=manifest.mf meta.inf.dir=${src.dir}/META-INF mkdist.disabled=false http://git-wip-us.apache.org/repos/asf/incubator-netbeans-tools/blob/aaa4ac02/convert/src/convert/CategorizeLicenses.java ---------------------------------------------------------------------- diff --git a/convert/src/convert/CategorizeLicenses.java b/convert/src/convert/CategorizeLicenses.java new file mode 100644 index 0000000..ee7d5c1 --- /dev/null +++ b/convert/src/convert/CategorizeLicenses.java @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package convert; + +import java.io.IOException; +import java.io.Writer; +import java.nio.file.Files; +import java.nio.file.Path; +import java.nio.file.Paths; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collection; +import java.util.Collections; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.regex.Matcher; +import java.util.regex.Pattern; +import java.util.stream.Collectors; + +public class CategorizeLicenses { + + public static void main(String[] args) throws IOException { + if (args.length != 2) { + System.err.println("Use: CategorizeLicenses <source-directory> <target-directory>"); + return ; + } + Path root = Paths.get(args[0]); + Map<String, List<String>> licenses = new HashMap<>(); + Map<String, List<String>> paragraphs = new HashMap<>(); + Set<String> noCDDL = new HashSet<>(); + Set<String> cddlNotRecognized = new HashSet<>(); + Files.find(root, Integer.MAX_VALUE, (p, attr) -> attr.isRegularFile()) + .forEach(p -> { + try { + String path = root.relativize(p).toString(); + String code = new String(Files.readAllBytes(p)); + + if (code.contains("CDDL")) { + String lic = snipLicense(code, p); + + if (lic != null && lic.contains("CDDL")) { + lic = YEARS_PATTERN.matcher(lic).replaceAll(Matcher.quoteReplacement("<YEARS>")); + lic = lic.replaceAll("([^\n])\n([^\n])", "$1 $2"); + lic = lic.replaceAll("[ \t]+", " "); + licenses.computeIfAbsent(lic, l -> new ArrayList<>()).add(path); + for (String par : lic.split("\n")) { + paragraphs.computeIfAbsent(par, l -> new ArrayList<>()).add(path); + } + return ; + } + + cddlNotRecognized.add(path); + return ; + } + noCDDL.add(path); + } catch (IOException ex) { + ex.printStackTrace(); + } + }); + + Path target = Paths.get(args[1]); + + int i = 0; + for (Map.Entry<String, List<String>> e : licenses.entrySet()) { + try (Writer w = Files.newBufferedWriter(target.resolve("lic" + i++))) { + w.write(e.getKey()); + w.write("\n\n"); + for (String file : e.getValue()) { + w.write(file); + w.write("\n"); + } + } + } + System.err.println("licenses count: " + licenses.size()); + System.err.println("paragraphs count: " + paragraphs.size()); + + System.err.println("cddl, unrecognized file: " + cddlNotRecognized.size()); + System.err.println("no cddl license: " + noCDDL.size()); + + dump(licenses, target, "lic"); + dump(paragraphs, target, "par"); + dump(Collections.singletonMap("Files which contain string CDDL, but their comment structure is not (yet) recognized.", cddlNotRecognized), target, "have-cddl-not-recognized-filetype"); + dump(Collections.singletonMap("Files which do not contain string CDDL", noCDDL), target, "do-not-have-cddl"); + } + private static final Pattern YEARS_PATTERN = Pattern.compile("[12][019][0-9][0-9]([ \t]*[-,/][ \t]*[12][019][0-9][0-9])?"); + + private static void dump(Map<String, ? extends Collection<String>> cat, Path target, String name) throws IOException { + int i = 0; + for (Map.Entry<String, ? extends Collection<String>> e : cat.entrySet()) { + try (Writer w = Files.newBufferedWriter(target.resolve(name + i++))) { + w.write(e.getKey()); + w.write("\n\n"); + w.write("files:\n"); + e.getValue().stream().sorted().forEach(file -> { + try { + w.write(file); + w.write("\n"); + } catch (IOException ex) { + throw new IllegalStateException(ex); + } + }); + } + } + } + private static String snipLicense(String code, Path file) { + String fn = file.getFileName().toString(); + switch (fn.substring(fn.lastIndexOf('.') + 1)) { + case "javx": case "c": case "h": case "cpp": + case "java": return snipLicense(code, "/\\*+", "\\*+/", "^[ \t]*\\**[ \t]*"); + case "html": case "xsd": case "xsl": case "dtd": + case "settings": case "wstcgrp": case "wstcref": + case "wsgrp": + case "xml": return snipLicense(code, "<!--+", "-+->", "^[ \t]*"); + case "sh": return snipLicenseBundle(code, "#!.*"); + case "properties": return snipLicenseBundle(code, null); + } + + return null; + } + + private static String snipLicense(String code, String commentStart, String commentEnd, String normalizeLines) { + Matcher startM = Pattern.compile(commentStart).matcher(code); + if (!startM.find()) + return null; + Matcher endM = Pattern.compile(commentEnd).matcher(code); + if (!endM.find(startM.end())) + return null; + String lic = code.substring(startM.end(), endM.start()); + if (normalizeLines != null) { + lic = Arrays.stream(lic.split("\n")) + .map(l -> l.replaceAll(normalizeLines, "")) + .collect(Collectors.joining("\n")); + } + return lic; + } + + private static String snipLicenseBundle(String code, String firstLinePattern) { + StringBuilder res = new StringBuilder(); + boolean firstLine = true; + for (String line : code.split("\n")) { + line = line.trim(); + if (firstLine && firstLinePattern != null && Pattern.compile(firstLinePattern).matcher(line).matches()) + continue; + firstLine = false; + if (line.startsWith("#")) { + res.append(line.substring(1).trim()); + res.append("\n"); + } else { + return res.toString(); + } + } + return res.toString(); + } + +}
