Hi Naoto and everyone,
There are almost as many occurrences of Locale.ROOT as Locale.US which
made me wonder which one is more appropriately locale-independent and
which is probably another topic and not actually relevant here because
BreakIterator.getCharacterInstance is locale-agnostic as far as I can
tell.
See attached patch with another attempt to fix bug 6202130.
Regards,
Philipp
On Tue, 2020-03-10 at 10:45 -0700, [email protected] wrote:
> Hi Philipp,
>
> ..., so using BreakIterator (with
> Locale.US) is more preferred solution to me.
>
> Naoto
diff -r 84215fa115fc src/java.base/share/classes/java/util/jar/Attributes.java
--- a/src/java.base/share/classes/java/util/jar/Attributes.java Fri Mar 20 17:37:52 2020 -0700
+++ b/src/java.base/share/classes/java/util/jar/Attributes.java Sat Mar 21 13:03:49 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -301,7 +301,6 @@
/*
* Writes the current attributes to the specified data output stream.
- * XXX Need to handle UTF8 values and break up lines longer than 72 bytes
*/
void write(DataOutputStream out) throws IOException {
StringBuilder buffer = new StringBuilder(72);
@@ -319,8 +318,6 @@
* Writes the current attributes to the specified data output stream,
* make sure to write out the MANIFEST_VERSION or SIGNATURE_VERSION
* attributes first.
- *
- * XXX Need to handle UTF8 values and break up lines longer than 72 bytes
*/
void writeMain(DataOutputStream out) throws IOException {
StringBuilder buffer = new StringBuilder(72);
diff -r 84215fa115fc src/java.base/share/classes/java/util/jar/Manifest.java
--- a/src/java.base/share/classes/java/util/jar/Manifest.java Fri Mar 20 17:37:52 2020 -0700
+++ b/src/java.base/share/classes/java/util/jar/Manifest.java Sat Mar 21 13:03:49 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 1997, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 1997, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -30,6 +30,8 @@
import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
+import java.text.BreakIterator;
+import java.util.Locale;
import java.util.HashMap;
import java.util.Map;
@@ -230,30 +232,89 @@
}
/**
- * Writes {@code line} to {@code out} with line breaks and continuation
- * spaces within the limits of 72 bytes of contents per line followed
- * by a line break.
+ * Writes {@code line} to {@code out} with at most 72 bytes per line.
+ * Byte sequences of characters and grapheme clusters encoded in UTF-8
+ * with less than 72 bytes are not interrupted with line breaks.
*/
static void println72(OutputStream out, String line) throws IOException {
- if (!line.isEmpty()) {
- byte[] lineBytes = line.getBytes(UTF_8.INSTANCE);
- int length = lineBytes.length;
- // first line can hold one byte more than subsequent lines which
- // start with a continuation line break space
- out.write(lineBytes[0]);
- int pos = 1;
- while (length - pos > 71) {
- out.write(lineBytes, pos, 71);
- pos += 71;
- println(out);
- out.write(' ');
- }
- out.write(lineBytes, pos, length - pos);
+ int linePos = 0; // number of bytes already put out on current line
+ BreakIterator characterBoundaryIterator =
+ BreakIterator.getCharacterInstance(Locale.US);
+ characterBoundaryIterator.setText(line);
+ int start = characterBoundaryIterator.first(), end;
+ while ((end = characterBoundaryIterator.next()) != BreakIterator.DONE) {
+ String characterString = line.substring(start, end);
+ start = end;
+ linePos = printChar72(out, linePos, characterString);
}
println(out);
}
/**
+ * Writes {@code characterString} in one piece on the current line or on a
+ * new continuation line after a line break unless it exceeds 71 bytes in
+ * its UTF-8 encoded form. More than 71 bytes cannot fit in a manifest line
+ * and will be broken across line breaks, reverting to respecting code point
+ * boundaries.
+ */
+ private static int printChar72(OutputStream out, int linePos,
+ String characterString) throws IOException {
+ // characterString contains one grapheme cluster consisting of an
+ // unlimited number of primitive chars.
+ byte[] characterBytes = characterString.getBytes(UTF_8.INSTANCE);
+ int characterLength = characterBytes.length;
+ int characterPos = 0; // number of bytes of current character
+ // already put out
+
+ // Put out a break onto a new line if the character or rather grapheme
+ // cluster does not fit on the current line anymore but fits on a new
+ // line.
+ // In other words, only if the current grapheme cluster exceeds 71 bytes
+ // in its UTF-8 encoded form and therefore does not fit on one whole
+ // continuation line alone, skip this line break and fill the current
+ // line first before putting a break inside of the grapheme cluster.
+ if (linePos + characterLength > 72 && characterLength < 72) {
+ println(out);
+ out.write(' ');
+ linePos = 1;
+ }
+
+ // Break exceptionally large grapheme clusters that don't fit on one
+ // line at code point boundaries.
+ while (linePos + characterLength - characterPos > 72) {
+ int nextBreakPos = 72 - linePos;
+ while (isContinuationByte(
+ characterBytes[characterPos + nextBreakPos])) {
+ nextBreakPos--;
+ }
+ out.write(characterBytes, characterPos, nextBreakPos);
+ characterPos += nextBreakPos;
+ println(out);
+ out.write(' ');
+ linePos = 1;
+ }
+
+ int remainder = characterLength - characterPos;
+ out.write(characterBytes, characterPos, remainder);
+ linePos += remainder;
+ return linePos;
+ }
+
+ /**
+ * Returns {@code true} if the passed byte as parameter {@code b}
+ * is not the first (or only) byte of a Unicode character encoded in UTF-8
+ * and {@code false} otherwise.
+ *
+ * @see <a href="https://tools.ietf.org/html/rfc3629#section-3">
+ * RFC 3629 - UTF-8, a transformation format of ISO 10646</a>
+ * @see StringCoding#isNotContinuation(int)
+ * @see sun.nio.cs.UTF_8.Decoder#isNotContinuation(int)
+ */
+ private static boolean isContinuationByte(byte b) {
+ return (b & 0xc0) == 0x80;
+ }
+
+ /**
* Writes a line break to {@code out}.
*/
static void println(OutputStream out) throws IOException {
diff -r 84215fa115fc test/jdk/java/util/jar/Manifest/PrintChar72.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/jar/Manifest/PrintChar72.java Sat Mar 21 13:03:49 2020 +0100
@@ -0,0 +1,482 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.DataOutputStream;
+import java.io.IOException;
+import java.util.jar.Attributes;
+import java.util.jar.Manifest;
+import java.util.jar.Attributes.Name;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.stream.IntStream;
+import java.util.stream.Stream;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+
+/**
+ * @test
+ * @bug 6202130
+ * @compile ../../../../sun/security/tools/jarsigner/Utils.java
+ * @run testng PrintChar72
+ * @summary Tests {@link Manifest#printChar72} breaking manifest header values
+ * across lines in conjunction with Unicode characters encoded in UTF-8 with a
+ * variable number of bytes when reading and writing jar manifests results in
+ * valid UTF-8.
+ * <p>
+ * The manifest line length limit (72 bytes) may be reached at a position
+ * between multiple bytes of a single UTF-8 encoded character or grapheme
+ * cluster. Although grapheme clusters should not be broken across lines
+ * according to the specification the previous Manifest implementation did.
+ * <p>
+ * This test makes sure that no Unicode code point character is broken apart
+ * across a line break when writing manifests and also that manifests are still
+ * read correctly whether or not characters encoded in UTF-8 with more than one
+ * byte are interrupted with and continued after a line break for compatibility
+ * when reading older manifests.
+ * <p>
+ * For cases with combining character sequences aka grapheme clusters see
+ * {@link PrintLine72}.
+ */
+public class PrintChar72 {
+
+ static final int MANIFEST_LINE_CONTENT_WIDTH_BYTES = 72;
+
+ /**
+ * Character string that has one byte size in its UTF-8 encoded form to
+ * yield one byte of position offset.
+ */
+ static final String FILL1BYTE = "x";
+ static final String MARK_BEFORE = "y";
+ static final String MARK_AFTER = "z";
+
+ /**
+ * Four byte name.
+ * By using header names of four characters length the same values can be
+ * used for testing line breaks in both headers (in main attributes as well
+ * as named sections) as well as section names because a named section name
+ * is represented basically like any other header but follows an empty line
+ * and the key is always "Name".
+ * Relative to the start of the value, this way the same offset to the
+ * character to test breaking can be used in all cases.
+ */
+ static final String FOUR_BYTE_NAME = "Name";
+
+ /**
+ * Distinguishes main attributes headers, section names, and headers in
+ * named sections because an implementation might make a difference.
+ */
+ enum PositionInManifest {
+ /**
+ * @see Attributes#writeMain
+ */
+ MAIN_ATTRIBUTES,
+ /**
+ * @see Attributes#write
+ */
+ SECTION_NAME,
+ /**
+ * @see Manifest#write
+ */
+ NAMED_SECTION;
+ }
+
+ static String numByteUnicodeCharacter(int numBytes) {
+ String string;
+ switch (numBytes) {
+ case 1: string = "i"; break;
+ case 2: string = "\u00EF"; break; // small letter i with diaresis
+ case 3: string = "\uFB00"; break; // small double f ligature
+ case 4: string = Character.toString(0x2070E); break; // surrogate pair
+ default: throw new AssertionError();
+ }
+ assertEquals(string.getBytes(UTF_8).length, numBytes,
+ "self-test failed: unexpected UTF-8 encoded character length");
+ return string;
+ }
+
+ /**
+ * Produces test cases with all combinations of circumstances covered in
+ * which a character could possibly be attempted to be broken across a line
+ * break onto a continuation line:<ul>
+ * <li>different sizes of a UTF-8 encoded characters: one, two, three, and
+ * four bytes,</li>
+ * <li>all possible positions of the character to test breaking with
+ * relative respect to the 72-byte line length limit including immediately
+ * before that character and immediately after the character and every
+ * position in between for multi-byte UTF-8 encoded characters,</li>
+ * <li>different number of preceding line breaks in the same value</li>
+ * <li>at the end of the value or followed by another character</li>
+ * <li>in a main attributes header value, section name, or named section
+ * header value (see also {@link #PositionInManifest})</li>
+ * </ul>
+ * The same set of test parameters is used to write and read manifests
+ * once without breaking characters apart
+ * ({@link #testWriteLineBreaksKeepCharactersTogether(int, int, int, int,
+ * PositionInManifest, String, String)}) and once with doing so
+ * ({@link #readCharactersBrokenAcrossLines(int, int, int, int,
+ * PositionInManifest, String, String)}) the latter case of which covering
+ * backwards compatibility and involves writing manifests like they were
+ * written before resolution of bug 6202130.
+ */
+ @DataProvider(name = "lineBreakParameters")
+ public static Object[][] lineBreakParameters() {
+ return Stream.of(new Object[] { null }
+ ).flatMap(o ->
+ // b: number of line breaks before character under test
+ IntStream.rangeClosed(0, 3).mapToObj(
+ b -> new Object[] { b })
+ ).flatMap(o ->
+ // c: unicode character UTF-8 encoded length in bytes
+ IntStream.rangeClosed(1, 4).mapToObj(
+ c -> new Object[] { o[0], c })
+ ).flatMap(o ->
+ // p: potential break position offset in bytes
+ // p == 0 => before character,
+ // p == c => after character, and
+ // 0 < p < c => position within the character's byte sequence
+ IntStream.rangeClosed(0, (int) o[1]).mapToObj(
+ p -> new Object[] { o[0], o[1], p })
+ ).flatMap(o ->
+ // a: no or one character following the one under test
+ // (a == 0 meaning the character under test is the end of
+ // the value which is followed by a line break in the
+ // resulting manifest without continuation line space which
+ // concludes the value)
+ IntStream.rangeClosed(0, 1).mapToObj(
+ a -> new Object[] { o[0], o[1], o[2], a })
+ ).flatMap(o ->
+ Stream.of(PositionInManifest.values()).map(
+ i -> new Object[] { o[0], o[1], o[2], o[3], i })
+ ).map(o -> {
+ int b = (int) o[0];
+ int c = (int) o[1];
+ int p = (int) o[2];
+ int a = (int) o[3];
+ PositionInManifest i = (PositionInManifest) o[4];
+
+ // offset: so many characters (actually bytes here, because filled
+ // with one byte characters) are needed to place the next character
+ // (the character under test) into a position relative to the
+ // maximum line width that it may or may not have to be broken onto
+ // the next line
+ int offset =
+ // number of lines; - 1 due to continuation " "
+ b * (MANIFEST_LINE_CONTENT_WIDTH_BYTES - 1)
+ // first line available content space (after "Name: ")
+ + MANIFEST_LINE_CONTENT_WIDTH_BYTES - 6
+ // position of line width limit relative to tested character
+ - p;
+
+ String value = "";
+ value += FILL1BYTE.repeat(offset - 1);
+ // character before the one to test the break
+ value += MARK_BEFORE;
+ String character = numByteUnicodeCharacter(c);
+ value += character;
+ // (optional) character after the one to test the break
+ value += MARK_AFTER.repeat(a);
+
+ return new Object[] { b, c, p, a, i, character, value };
+ }).toArray(size -> new Object[size][]);
+ }
+
+ /**
+ * Checks that unicode characters work well with line breaks and
+ * continuation lines in jar manifests without breaking a character across
+ * a line break even when encoded in UTF-8 with more than one byte.
+ * <p>
+ * For each of the cases provided by {@link #lineBreakParameters()} the
+ * break position is verified in the written manifest binary form as well
+ * as verified that it restores to the original values when read again.
+ * <p>
+ * As an additional check, the binary manifests are decoded from UTF-8
+ * into strings before re-joining continued lines.
+ */
+ @Test(dataProvider = "lineBreakParameters")
+ public void testWriteLineBreaksKeepCharactersTogether(int b, int c, int p,
+ int a, PositionInManifest i, String character, String value)
+ throws Exception {
+ // in order to unambiguously establish the position of "character" in
+ // brokenPart, brokenPart is prepended and appended with what is
+ // expected before and after it...
+ String brokenPart = MARK_BEFORE;
+
+ // expect the whole character on the next line unless it fits
+ // completely on the current line
+ boolean breakExpectedBeforeCharacter = p < c;
+ if (breakExpectedBeforeCharacter) {
+ brokenPart += "\r\n ";
+ }
+ brokenPart += character;
+ // expect a line break before the next character if there is a next
+ // character and the previous not already broken on next line
+ if (a > 0) {
+ if (!breakExpectedBeforeCharacter) {
+ brokenPart += "\r\n ";
+ }
+ brokenPart += MARK_AFTER;
+ }
+ brokenPart = brokenPart + "\r\n";
+
+ byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, value);
+ try {
+ assertOccurrence(mfBytes, brokenPart.getBytes(UTF_8));
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
+ decodeManifestAsUtf8AndAssertValue(
+ mfBytes, FOUR_BYTE_NAME, value, true);
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ throw e;
+ }
+ }
+
+ static byte[] writeManifest(PositionInManifest i, String name,
+ String value) throws IOException {
+ Manifest mf = new Manifest();
+ mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
+
+ switch (i) {
+ case MAIN_ATTRIBUTES:
+ mf.getMainAttributes().put(new Name(name), value);
+ break;
+ case SECTION_NAME:
+ mf.getEntries().put(value, new Attributes());
+ break;
+ case NAMED_SECTION:
+ Attributes attributes = new Attributes();
+ mf.getEntries().put(FOUR_BYTE_NAME, attributes);
+ attributes.put(new Name(name), value);
+ break;
+ }
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ mf.write(out);
+ return out.toByteArray();
+ }
+
+ /**
+ * Asserts one and only one occurrence of a sequence of bytes {@code part}
+ * representing the character and how it is expected to be broken and its
+ * surrounding bytes in a larger sequence that corresponds to the manifest
+ * in binary form {@code mf}.
+ */
+ static void assertOccurrence(byte[] mf, byte[] part) {
+ List<Integer> matchPos = new LinkedList<>();
+ for (int i = 0; i < mf.length; i++) {
+ for (int j = 0; j < part.length && i + j <= mf.length; j++) {
+ if (part[j] == 0) {
+ if (i + j != mf.length) {
+ break; // expected eof not found
+ }
+ } else if (i + j == mf.length) {
+ break;
+ } else if (mf[i + j] != part[j]) {
+ break;
+ }
+ if (j == part.length - 1) {
+ matchPos.add(i);
+ }
+ }
+ }
+ assertEquals(matchPos.size(), 1, "not "
+ + (matchPos.size() < 1 ? "found" : "unique") + ": '"
+ + new String(part, UTF_8) + "'");
+ }
+
+ static void readManifestAndAssertValue(
+ byte[] mfBytes, PositionInManifest i, String name, String value)
+ throws IOException {
+ Manifest mf = new Manifest(new ByteArrayInputStream(mfBytes));
+
+ switch (i) {
+ case MAIN_ATTRIBUTES:
+ assertEquals(mf.getMainAttributes().getValue(name), value,
+ "main attributes header value");
+ break;
+ case SECTION_NAME:
+ Attributes attributes = mf.getAttributes(value);
+ assertNotNull(attributes, "named section not found");
+ break;
+ case NAMED_SECTION:
+ attributes = mf.getAttributes(FOUR_BYTE_NAME);
+ assertEquals(attributes.getValue(name), value,
+ "named section attributes header value");
+ break;
+ }
+ }
+
+ /**
+ * Decodes a binary manifest {@code mfBytes} into UTF-8 first, before
+ * joining the continuation lines unlike {@link Manifest} and
+ * {@link Attributes} which join the continuation lines first, before
+ * decoding the joined line from UTF-8 into a {@link String}, evaluating
+ * whether or not the binary manifest is valid UTF-8.
+ */
+ static void decodeManifestAsUtf8AndAssertValue(
+ byte[] mfBytes, String name, String value,
+ boolean validUtf8ManifestExpected) throws IOException {
+ String mf = new String(mfBytes, UTF_8)
+ .replaceAll("(\\r\\n|(?!\\r)\\n|\\r(?!\\n)) ", "");
+ String header = "\r\n" + name + ": " + value + "\r\n";
+ int pos = mf.indexOf(header);
+ if (validUtf8ManifestExpected) {
+ assertTrue(pos > 0);
+ pos = mf.indexOf(header, pos + 1);
+ }
+ // assert no ocurrence or no other occurrence after one match above
+ assertTrue(pos == -1);
+ }
+
+ @Test(dataProvider = "lineBreakParameters")
+ public void readCharactersBrokenAcrossLines(int b, int c, int p, int a,
+ PositionInManifest i, String character, String value)
+ throws Exception {
+ ByteArrayOutputStream buf = new ByteArrayOutputStream();
+ buf.write(MARK_BEFORE.getBytes(UTF_8));
+ byte[] characterBytes = character.getBytes(UTF_8);
+ // the portion of the character that fits on the current line before
+ // a break at 72 bytes, ranges from nothing (p == 0) to the whole
+ // character (p == c)
+ for (int j = 0; j < p; j++) {
+ buf.write(characterBytes, j, 1);
+ }
+ // expect a line break at exactly 72 bytes from the beginning of the
+ // line unless the whole character fits on that line
+ boolean breakExpectedBeforeCharacter = p < c;
+ if (breakExpectedBeforeCharacter) {
+ buf.write("\r\n ".getBytes(UTF_8));
+ }
+ // the remaining portion of the character, if any
+ for (int j = p; j < c; j++) {
+ buf.write(characterBytes, j, 1);
+ }
+ // expect another line break if the whole character fitted on the same
+ // line and there is another character
+ if (a == 1) {
+ if (c == p) {
+ buf.write("\r\n ".getBytes(UTF_8));
+ }
+ buf.write(MARK_AFTER.getBytes(UTF_8));
+ }
+ // expect a line break immediately after the value
+ buf.write("\r\n".getBytes(UTF_8));
+ byte[] brokenPart = buf.toByteArray();
+
+ byte[] mfBytes = writeManifestWithBrokenCharacters(
+ i, FOUR_BYTE_NAME, value);
+ try {
+ assertOccurrence(mfBytes, brokenPart);
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, value);
+ decodeManifestAsUtf8AndAssertValue(
+ mfBytes, FOUR_BYTE_NAME, value, p == 0 || p == c);
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ throw e;
+ }
+ }
+
+ /**
+ * From the previous {@link Manifest} implementation reduced to the minimum
+ * required to demonstrate compatibility.
+ */
+ @SuppressWarnings("deprecation")
+ static byte[] writeManifestWithBrokenCharacters(
+ PositionInManifest i, String name, String value)
+ throws IOException {
+ byte[] vb = value.getBytes(UTF_8);
+ value = new String(vb, 0, 0, vb.length);
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ DataOutputStream dos = new DataOutputStream(out);
+ dos.writeBytes(Name.MANIFEST_VERSION + ": 0.1\r\n");
+
+ if (i == PositionInManifest.MAIN_ATTRIBUTES) {
+ StringBuffer buffer = new StringBuffer(name);
+ buffer.append(": ");
+ buffer.append(value);
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+ }
+ dos.writeBytes("\r\n");
+
+ if (i == PositionInManifest.SECTION_NAME ||
+ i == PositionInManifest.NAMED_SECTION) {
+ StringBuffer buffer = new StringBuffer("Name: ");
+ if (i == PositionInManifest.SECTION_NAME) {
+ buffer.append(value);
+ } else {
+ buffer.append(FOUR_BYTE_NAME);
+ }
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+
+ if (i == PositionInManifest.NAMED_SECTION) {
+ buffer = new StringBuffer(name);
+ buffer.append(": ");
+ buffer.append(value);
+ make72Safe(buffer);
+ buffer.append("\r\n");
+ dos.writeBytes(buffer.toString());
+ }
+
+ dos.writeBytes("\r\n");
+ }
+
+ dos.flush();
+ return out.toByteArray();
+ }
+
+ /**
+ * Adds line breaks to enforce a maximum 72 bytes per line.
+ * <p>
+ * From previous Manifest implementation without respect for UTF-8 encoded
+ * character boundaries breaking also within multi-byte UTF-8 encoded
+ * characters.
+ *
+ * @see {@link Manifest#make72Safe}
+ */
+ static void make72Safe(StringBuffer line) {
+ int length = line.length();
+ int index = 72;
+ while (index < length) {
+ line.insert(index, "\r\n ");
+ index += 74; // + line width + line break ("\r\n")
+ length += 3; // + line break ("\r\n") and space
+ }
+ }
+
+ @Test
+ public void testEmptyValue() throws Exception {
+ for (PositionInManifest i : PositionInManifest.values()) {
+ byte[] mfBytes = writeManifest(i, FOUR_BYTE_NAME, "");
+ readManifestAndAssertValue(mfBytes, i, FOUR_BYTE_NAME, "");
+ }
+ }
+
+}
diff -r 84215fa115fc test/jdk/java/util/jar/Manifest/PrintLine72.java
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/test/jdk/java/util/jar/Manifest/PrintLine72.java Sat Mar 21 13:03:49 2020 +0100
@@ -0,0 +1,237 @@
+/*
+ * Copyright (c) 2019, Oracle and/or its affiliates. All rights reserved.
+ * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
+ *
+ * This code is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 only, as
+ * published by the Free Software Foundation.
+ *
+ * This code is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+ * version 2 for more details (a copy is included in the LICENSE file that
+ * accompanied this code).
+ *
+ * You should have received a copy of the GNU General Public License version
+ * 2 along with this work; if not, write to the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
+ *
+ * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
+ * or visit www.oracle.com if you need additional information or have any
+ * questions.
+ */
+
+import static java.nio.charset.StandardCharsets.UTF_8;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.jar.Manifest;
+import java.util.jar.Attributes.Name;
+import java.util.List;
+import java.util.stream.Collectors;
+import java.util.stream.IntStream;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+
+/**
+ * @test
+ * @compile ../../../../sun/security/tools/jarsigner/Utils.java
+ * @bug 6202130
+ * @run testng PrintLine72
+ * @summary Tests {@link Manifest#println72} line breaking with some particular
+ * border case kind of test cases involving combining character sequence
+ * grapheme clusters.
+ * <p>
+ * For another test covering the complete Unicode character set see
+ * {@link ValueUtf8Coding} and for a test for not breaking Unicode code point
+ * UTF-8 encoded byte sequences see {@link PrintChar72}.
+ */
+public class PrintLine72 {
+
+ static final Name TEST_NAME = new Name("test");
+
+ static final int NAME_SEP_LENGTH = (TEST_NAME + ": ").length();
+
+ void test(String originalValue, int... breakPositionsBytes)
+ throws IOException {
+ String expectedValueWithBreaksInManifest = originalValue;
+ // iterating backwards because inserting breaks affects positions
+ for (int i = breakPositionsBytes.length - 1; i >= 0; i--) {
+ int breakPositionBytes = breakPositionsBytes[i];
+
+ // Translate breakPositionBytes byte offset into
+ // breakPositionCharacters (primitive char type) character offset
+ // for cutting the string with String.substring lateron.
+ // Higher code points may be represented with two UTF-16 surrogate
+ // pair characters which both count for String.substring.
+ int bytesSoFar = 0;
+ int charsSoFar = 0;
+ while (bytesSoFar < breakPositionBytes) {
+ bytesSoFar += expectedValueWithBreaksInManifest
+ .substring(charsSoFar, charsSoFar + 1)
+ .getBytes(UTF_8).length;
+ charsSoFar++;
+ assertTrue(bytesSoFar <= breakPositionBytes,
+ "break position not aligned with characters");
+ }
+ int breakPositionCharacters = charsSoFar;
+
+ expectedValueWithBreaksInManifest =
+ expectedValueWithBreaksInManifest
+ .substring(0, breakPositionCharacters)
+ + "\r\n " +
+ expectedValueWithBreaksInManifest
+ .substring(breakPositionCharacters);
+ }
+
+ Manifest mf = new Manifest();
+ mf.getMainAttributes().put(Name.MANIFEST_VERSION, "1.0");
+ mf.getMainAttributes().put(TEST_NAME, originalValue);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ mf.write(out);
+ byte[] mfBytes = out.toByteArray();
+
+ byte[] actual = mfBytes;
+ String expected =
+ "Manifest-Version: 1.0\r\n" +
+ TEST_NAME + ": " + expectedValueWithBreaksInManifest +
+ "\r\n\r\n";
+ try {
+ assertEquals(new String(actual, UTF_8), expected);
+ assertEquals(actual, expected.getBytes(UTF_8));
+ } catch (AssertionError e) {
+ Utils.echoManifest(mfBytes, "faulty manifest: " + e);
+ System.out.println("actual = " + byteArrayToIntList(actual));
+ System.out.println("expected = " + byteArrayToIntList(
+ expected.getBytes(UTF_8)));
+ throw e;
+ }
+ }
+
+ static List<Integer> byteArrayToIntList(byte[] bytes) {
+ List<Integer> list = new ArrayList<>();
+ for (int i = 0; i < bytes.length; i++) {
+ list.add((int) bytes[i]);
+ }
+ return list;
+ }
+
+ @Test
+ public void testBreakAlignmentSelfTest() throws Exception {
+ assertThrows(AssertionError.class, () -> test(getCharSeq(2), 1));
+ assertThrows(AssertionError.class, () -> test("\uD841\uDF0E", 2));
+ }
+
+ @Test
+ public void testEmpty() throws Exception {
+ test(""); // expect neither a line break nor an exception
+ }
+
+ static final String COMBINING_DIACRITICAL_MARKS =
+ IntStream.range(0x300, 0x36F)
+ .mapToObj(i -> new String(Character.toChars(i)))
+ .collect(Collectors.joining());
+
+ static String getCharSeq(int numberOfBytes) {
+ String seq = (numberOfBytes % 2 == 1 ? "e" : "\u00E6")
+ + COMBINING_DIACRITICAL_MARKS.substring(0, (numberOfBytes - 1) / 2);
+ assertEquals(seq.getBytes(UTF_8).length, numberOfBytes);
+ return seq;
+ }
+
+ @Test
+ public void testBreakOnFirstLine() throws Exception {
+ // Combining sequence starts immediately after name and ": " and fits
+ // the remaining space in the first line. Expect no break.
+ test(getCharSeq(66));
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line by one byte. Expect to break on a
+ // new line because the combining sequence fits on a continuation line
+ // which does not start with name and ": " and provides enough space.
+ test(getCharSeq(67), 0);
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line but still just fits exactly on a
+ // continuation line. Expect the value to break onto a new line.
+ test(getCharSeq(71), 0);
+
+ // Combining sequence starts after name and ": " and exceeds the
+ // remaining space in the first line and neither fits on a continuation
+ // line. Expect that the first line to be filled with as many codepoints
+ // as fit on it and expect a line break onto a continuation line after
+ // 66 bytes of the first line value.
+ test(getCharSeq(72), 72 - NAME_SEP_LENGTH);
+
+ // Combining sequence starts after name and ": x" and exceeds the
+ // remaining space in the first line and neither fits on a continuation
+ // line. Expect that the first line to be filled with as many codepoints
+ // as fit on it and expect a line break onto a continuation line already
+ // after 65 bytes of the first line because the following character is
+ // a code point represented with two bytes in UTF-8 which should not
+ // be interrupted with a line break.
+ test("x" + getCharSeq(72), 72 - NAME_SEP_LENGTH - 1);
+ }
+
+ @Test
+ public void testBreakOnContinuationLine() throws Exception {
+ final int FIRST_LINE_FREE_SPACE = 72 - NAME_SEP_LENGTH; // bytes
+ final String FILL_FIRST_LINE = "x".repeat(FIRST_LINE_FREE_SPACE);
+
+ // fits on next line by skipping one byte free on current line
+ test(FILL_FIRST_LINE + "x".repeat(71 - 1) + getCharSeq(71),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71 - 1);
+
+ // fits on current line exactly
+ test(FILL_FIRST_LINE + "x".repeat(71) + getCharSeq(71),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71);
+
+ // fits on next line by inserting a line break after a line that
+ // contains only one character yet
+ test(FILL_FIRST_LINE + "x".repeat(71 + 1) + getCharSeq(71),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71,
+ FIRST_LINE_FREE_SPACE + 71 + 1);
+
+ // does not fit on the next line and the one byte not yet used on the
+ // current line does not hold the first code point of the combined
+ // character sequence which is a code point encoded with two bytes in
+ // UTF-8.
+ assertTrue(getCharSeq(72).substring(0, 1).getBytes(UTF_8).length == 2);
+ test(FILL_FIRST_LINE + "x".repeat(71 - 1) + getCharSeq(72),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71 - 1,
+ FIRST_LINE_FREE_SPACE + 71 - 1 + 71 - 1);
+
+ // would not fit on the next line alone but fits on the remaining two
+ // bytes available on the current line and the whole subsequent line.
+ test(FILL_FIRST_LINE + "x".repeat(71 - 2) + getCharSeq(72),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71);
+
+ // previous character filled the whole previous line completely
+ // and the combined character sequence with 72 bytes still does not fit
+ // on a single line. the last code point is a two byte one so that an
+ // unused byte is left unused on the second last line.
+ assertTrue(getCharSeq(72).length() == 36); // primitive chars
+ assertTrue(getCharSeq(72).substring(35).getBytes(UTF_8).length == 2);
+ test(FILL_FIRST_LINE + "x".repeat(71) + getCharSeq(72),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71,
+ FIRST_LINE_FREE_SPACE + 71 + 71 - 1);
+
+ // previous character left one byte used on the current line and 70
+ // bytes remaining available. the combining sequence can use all of
+ // these 70 bytes because after 70 bytes a new code point starts
+ test(FILL_FIRST_LINE + "x".repeat(71 + 1) + getCharSeq(72),
+ FIRST_LINE_FREE_SPACE,
+ FIRST_LINE_FREE_SPACE + 71,
+ FIRST_LINE_FREE_SPACE + 71 + 71);
+ }
+
+}
diff -r 84215fa115fc test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java
--- a/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Fri Mar 20 17:37:52 2020 -0700
+++ b/test/jdk/sun/security/tools/jarsigner/LineBrokenMultiByteCharacter.java Sat Mar 21 13:03:49 2020 +0100
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017, 2019, Oracle and/or its affiliates. All rights reserved.
+ * Copyright (c) 2017, 2020, Oracle and/or its affiliates. All rights reserved.
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
*
* This code is free software; you can redistribute it and/or modify it
@@ -21,32 +21,49 @@
* questions.
*/
-/*
+import java.io.File;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.Arrays;
+import java.util.stream.Collectors;
+import java.util.jar.Attributes.Name;
+import java.util.jar.Manifest;
+import java.util.jar.JarFile;
+import java.util.jar.JarEntry;
+import java.util.zip.ZipFile;
+import java.util.zip.ZipEntry;
+
+import static java.util.jar.JarFile.MANIFEST_NAME;
+import static java.nio.charset.StandardCharsets.UTF_8;
+import static java.nio.file.StandardCopyOption.COPY_ATTRIBUTES;
+import static java.nio.file.StandardCopyOption.REPLACE_EXISTING;
+
+import jdk.test.lib.SecurityTools;
+import jdk.test.lib.util.JarUtils;
+
+/**
* @test
- * @bug 6695402
+ * @bug 6695402 6202130
* @summary verify signatures of jars containing classes with names
* with multi-byte unicode characters broken across lines
* @library /test/lib
*/
-
-import java.io.IOException;
-import java.io.InputStream;
-import java.nio.file.Files;
-import java.nio.file.Paths;
-import java.util.jar.JarFile;
-import java.util.jar.Attributes.Name;
-import java.util.jar.JarEntry;
-
-import static java.nio.charset.StandardCharsets.UTF_8;
-
-import jdk.test.lib.SecurityTools;
-import jdk.test.lib.util.JarUtils;
-
+/*
+ * Note to future maintainers:
+ * The case this test here (LineBrokenMultiByteCharacter) verifies has been
+ * addressed in bug 8217375 with a much more general approach than with bug
+ * 6695402 and bug 8217375 came with its own much broader set of tests so that
+ * now after resolution of bug 8217375, this test here might possibly not worth
+ * to maintain any further.
+ */
public class LineBrokenMultiByteCharacter {
/**
* this name will break across lines in MANIFEST.MF at the
- * middle of a two-byte utf-8 encoded character due to its e acute letter
+ * middle of a two-byte UTF-8 encoded character due to its e acute letter
* at its exact position.
*
* because no file with such a name exists {@link JarUtils} will add the
@@ -63,53 +80,58 @@
static final String anotherName =
"LineBrokenMultiByteCharacterA1234567890B1234567890C123456789D1234567890.class";
- static final String alias = "a";
- static final String keystoreFileName = "test.jks";
- static final String manifestFileName = "MANIFEST.MF";
+ static final String ALIAS = "A";
+ static final String KEYSTORE_FILENAME = "test.jks";
+ static final String SOME_OTHER_SIG_FILE = "META-INF/FAKE_SIG.DSA";
public static void main(String[] args) throws Exception {
prepare();
testSignJar("test.jar");
- testSignJarNoManifest("test-no-manifest.jar");
testSignJarUpdate("test-update.jar", "test-updated.jar");
}
static void prepare() throws Exception {
- SecurityTools.keytool("-keystore", keystoreFileName, "-genkeypair",
+ SecurityTools.keytool("-keystore", KEYSTORE_FILENAME, "-genkeypair",
"-keyalg", "dsa",
"-storepass", "changeit", "-keypass", "changeit", "-storetype",
- "JKS", "-alias", alias, "-dname", "CN=X", "-validity", "366")
+ "JKS", "-alias", ALIAS, "-dname", "CN=X", "-validity", "366")
.shouldHaveExitValue(0);
- Files.write(Paths.get(manifestFileName), (Name.
+ new File(MANIFEST_NAME).getParentFile().mkdirs();
+ Files.write(Paths.get(MANIFEST_NAME), (Name.
MANIFEST_VERSION.toString() + ": 1.0\r\n").getBytes(UTF_8));
+
+ // prevent jarsigner from assuming it was safe to rewrite the manifest
+ // and its line breaks assuming there were no other signatures present
+ Files.write(Paths.get(SOME_OTHER_SIG_FILE), new byte[] {});
}
static void testSignJar(String jarFileName) throws Exception {
- JarUtils.createJar(jarFileName, manifestFileName, testClassName);
- verifyJarSignature(jarFileName);
- }
-
- static void testSignJarNoManifest(String jarFileName) throws Exception {
- JarUtils.createJar(jarFileName, testClassName);
+ JarUtils.createJar(jarFileName, testClassName, SOME_OTHER_SIG_FILE);
+ createManifestEntries(jarFileName);
+ rebreakManifest72bytes(jarFileName);
verifyJarSignature(jarFileName);
}
static void testSignJarUpdate(
String initialFileName, String updatedFileName) throws Exception {
- JarUtils.createJar(initialFileName, manifestFileName, anotherName);
- SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype",
+ JarUtils.createJar(initialFileName, testClassName, anotherName,
+ SOME_OTHER_SIG_FILE);
+ createManifestEntries(initialFileName);
+ rebreakManifest72bytes(initialFileName);
+ removeJarEntry(initialFileName, testClassName);
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype",
"JKS", "-storepass", "changeit", "-debug", initialFileName,
- alias).shouldHaveExitValue(0);
+ ALIAS).shouldHaveExitValue(0);
JarUtils.updateJar(initialFileName, updatedFileName, testClassName);
verifyJarSignature(updatedFileName);
}
static void verifyJarSignature(String jarFileName) throws Exception {
// actually sign the jar
- SecurityTools.jarsigner("-keystore", keystoreFileName, "-storetype",
- "JKS", "-storepass", "changeit", "-debug", jarFileName, alias)
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME, "-storetype",
+ "JKS", "-storepass", "changeit", "-debug", jarFileName, ALIAS)
.shouldHaveExitValue(0);
try (
@@ -130,7 +152,7 @@
* the signature file does not even contain the desired entry at all.
*
* this relies on {@link java.util.jar.Manifest} breaking lines unaware
- * of bytes that belong to the same multi-byte utf characters.
+ * of bytes that belong to the same multi-byte UTF-8 encoded characters.
*/
static void verifyClassNameLineBroken(JarFile jar, String className)
throws IOException {
@@ -142,7 +164,7 @@
throw new AssertionError(className + " not found in manifest");
}
- JarEntry manifestEntry = jar.getJarEntry(JarFile.MANIFEST_NAME);
+ JarEntry manifestEntry = jar.getJarEntry(MANIFEST_NAME);
try (
InputStream manifestIs = jar.getInputStream(manifestEntry);
) {
@@ -159,7 +181,7 @@
}
if (bytesMatched < eAcuteBroken.length) {
throw new AssertionError("self-test failed: multi-byte "
- + "utf-8 character not broken across lines");
+ + "UTF-8 encoded character not broken across lines");
}
}
}
@@ -183,4 +205,108 @@
}
}
+ static void createManifestEntries(String jarFileName) throws Exception {
+ JarUtils.updateJarFile(Paths.get(jarFileName),
+ Paths.get("."), Paths.get(MANIFEST_NAME));
+ SecurityTools.jarsigner("-keystore", KEYSTORE_FILENAME,
+ "-storepass", "changeit", "-debug", jarFileName, ALIAS)
+ .shouldHaveExitValue(0);
+ // remove the signature files, only manifest is used
+ removeJarEntry(jarFileName,
+ "META-INF/" + ALIAS + ".SF",
+ "META-INF/" + ALIAS + ".DSA");
+ }
+
+ @SuppressWarnings("deprecation")
+ static void removeJarEntry(String jarFileName, String... entryNames)
+ throws IOException {
+ String aCopy = "swap-" + jarFileName;
+ JarUtils.updateJar(jarFileName, aCopy, Arrays.asList(entryNames)
+ .stream().collect(Collectors.toMap(e -> e, e -> false)));
+ Files.copy(Paths.get(aCopy), Paths.get(jarFileName),
+ COPY_ATTRIBUTES, REPLACE_EXISTING);
+ Files.delete(Paths.get(aCopy));
+ }
+
+ static void rebreakManifest72bytes(String jarFileName) throws Exception {
+ byte[] manifest;
+ try (ZipFile zip = new ZipFile(jarFileName)) {
+ ZipEntry zipEntry = zip.getEntry(MANIFEST_NAME);
+ manifest = zip.getInputStream(zipEntry).readAllBytes();
+ }
+ Utils.echoManifest(manifest, MANIFEST_NAME + " before re-break:");
+ byte[] manifest72 = rebreak72bytes(manifest);
+ Utils.echoManifest(manifest72, MANIFEST_NAME + " after re-break:");
+ String aCopy = "swap-" + jarFileName;
+ JarUtils.updateManifest(jarFileName, aCopy, new Manifest() { @Override
+ public void write(OutputStream out) throws IOException {
+ out.write(manifest72);
+ }
+ });
+ Files.copy(Paths.get(aCopy), Paths.get(jarFileName),
+ COPY_ATTRIBUTES, REPLACE_EXISTING);
+ Files.delete(Paths.get(aCopy));
+ }
+
+ /**
+ * Simulates a jar manifest as it would have been created by an earlier
+ * JDK by re-arranging the line break at exactly 72 bytes content thereby
+ * breaking the multi-byte UTF-8 encoded character under test like before
+ * resolution of bug 6202130.
+ * <p>
+ * The returned manifest is accepted as unmodified by
+ * {@link jdk.security.jarsigner.JarSigner#updateDigests
+ * (ZipEntry,ZipFile,MessageDigest[],Manifest)} on line 985:
+ * <pre>
+ * if (!mfDigest.equalsIgnoreCase(base64Digests[i])) {
+ * </pre>
+ * and therefore left unchanged when the jar is signed and also signature
+ * verification will check it.
+ */
+ static byte[] rebreak72bytes(byte[] mf0) {
+ byte[] mf1 = new byte[mf0.length];
+ int c0 = 0, c1 = 0; // bytes since last line start
+ for (int i0 = 0, i1 = 0; i0 < mf0.length; i0++, i1++) {
+ switch (mf0[i0]) {
+ case '\r':
+ if (i0 + 2 < mf0.length &&
+ mf0[i0 + 1] == '\n' && mf0[i0 + 2] == ' ') {
+ // skip line break
+ i0 += 2;
+ i1 -= 1;
+ } else {
+ mf1[i1] = mf0[i0];
+ c0 = c1 = 0;
+ }
+ break;
+ case '\n':
+ if (i0 + 1 < mf0.length && mf0[i0 + 1] == ' ') {
+ // skip line break
+ i0 += 1;
+ i1 -= 1;
+ } else {
+ mf1[i1] = mf0[i0];
+ c0 = c1 = 0;
+ }
+ break;
+ case ' ':
+ if (c0 == 0) {
+ continue;
+ }
+ default:
+ c0++;
+ if (c1 == 72) {
+ mf1[i1++] = '\r';
+ mf1[i1++] = '\n';
+ mf1[i1++] = ' ';
+ c1 = 1;
+ } else {
+ c1++;
+ }
+ mf1[i1] = mf0[i0];
+ }
+ }
+ return mf1;
+ }
+
}