garydgregory commented on code in PR #781:
URL: https://github.com/apache/commons-io/pull/781#discussion_r2340387161
##########
src/main/java/org/apache/commons/io/FileSystem.java:
##########
@@ -504,30 +517,201 @@ public boolean supportsDriveLetter() {
}
/**
- * Converts a candidate file name (without a path) like {@code
"filename.ext"} or {@code "filename"} to a legal file
- * name. Illegal characters in the candidate name are replaced by the
{@code replacement} character. If the file
- * name length exceeds {@link #getMaxFileNameLength()}, then the name is
truncated to
- * {@link #getMaxFileNameLength()}.
+ * Converts a candidate file name (without a path) to a legal file name.
+ *
+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"}
and:</p>
+ * <ul>
+ * <li>replaces illegal characters by the given replacement
character</li>
+ * <li>truncates the name to {@link #getMaxFileNameLength()} if
necessary</li>
+ * </ul>
*
* @param candidate
- * a candidate file name (without a path) like {@code
"filename.ext"} or {@code "filename"}
+ * A candidate file name (without a path) like {@code
"filename.ext"} or {@code "filename"}
* @param replacement
* Illegal characters in the candidate name are replaced by
this character
* @return a String without illegal characters
*/
public String toLegalFileName(final String candidate, final char
replacement) {
+ return toLegalFileName(candidate, replacement,
Charset.defaultCharset());
+ }
+
+ /**
+ * Converts a candidate file name (without a path) to a legal file name.
+ *
+ * <p>Takes a file name like {@code "filename.ext"} or {@code "filename"}
and:</p>
+ * <ul>
+ * <li>replaces illegal characters by the given replacement
character</li>
+ * <li>truncates the name to {@link #getMaxFileNameLength()} if
necessary</li>
+ * </ul>
+ *
+ * @param candidate
+ * A candidate file name (without a path) like {@code
"filename.ext"} or {@code "filename"}
+ * @param replacement
+ * Illegal characters in the candidate name are replaced by
this character
+ * @param charset
+ * The charset to use when the file name length is measured in
bytes
+ * @return a String without illegal characters
+ * @since 2.21.0
+ */
+ public String toLegalFileName(final String candidate, final char
replacement, final Charset charset) {
+ Objects.requireNonNull(candidate, "candidate");
+ if (candidate.isEmpty()) {
+ throw new IllegalArgumentException("The candidate file name is
empty");
+ }
if (isIllegalFileNameChar(replacement)) {
// %s does not work properly with NUL
throw new IllegalArgumentException(String.format("The replacement
character '%s' cannot be one of the %s illegal characters: %s",
replacement == '\0' ? "\\0" : replacement, name(),
Arrays.toString(illegalFileNameChars)));
}
- final String truncated = candidate.length() > maxFileNameLength ?
candidate.substring(0, maxFileNameLength) : candidate;
+ final CharSequence truncated = nameLengthStrategy.truncate(candidate,
getMaxFileNameLength(), charset);
final int[] array = truncated.chars().map(i ->
isIllegalFileNameChar(i) ? replacement : i).toArray();
return new String(array, 0, array.length);
}
CharSequence trimExtension(final CharSequence cs) {
- final int index = indexOf(cs, '.', 0);
- return index < 0 ? cs : cs.subSequence(0, index);
+ final int index = indexOfFirstDot(cs);
+ // An initial dot is not an extension
+ return index < 1 ? cs : cs.subSequence(0, index);
}
+
+ /**
+ * Strategy for measuring and truncating file or path names in different
units.
+ * Implementations measure length and can truncate to a specified limit.
+ */
+ enum NameLengthStrategy {
+ /** Length measured as encoded bytes. */
+ BYTES {
+ @Override
+ int getLength(final CharSequence value, final Charset charset) {
+ final CharsetEncoder enc = charset.newEncoder()
+ .onMalformedInput(CodingErrorAction.REPORT)
+ .onUnmappableCharacter(CodingErrorAction.REPORT);
+ try {
+ return enc.encode(CharBuffer.wrap(value)).remaining();
+ } catch (CharacterCodingException e) {
+ // Unencodable, does not fit any byte limit.
+ return Integer.MAX_VALUE;
+ }
+ }
+
+ @Override
+ CharSequence truncate(final CharSequence value, final int limit,
final Charset charset) {
Review Comment:
We are probably biting off more than we can chew here, considering issues
like https://issues.apache.org/jira/projects/LANG/issues/LANG-1770. Maybe we
should do a "hard" truncate and Javadoc the possible gotcha.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]