On Thu 24-12-15 10:25:38, Andrew Gabbasov wrote: > Current implementation of udf_translate_to_linux function does not > support multi-bytes characters at all: it counts bytes while calculating > extension length, when inserting CRC inside the name it doesn't > take into account inter-character boundaries and can break into > the middle of the character. > > The most efficient way to properly support multi-bytes characters is > merging of translation operations directly into conversion function. > This can help to avoid extra passes along the string or parsing > the multi-bytes character back into unicode to find out it's length. > > Signed-off-by: Andrew Gabbasov <andrew_gabba...@mentor.com>
Looks mostly good. A few comments below. > --- > fs/udf/unicode.c | 260 > ++++++++++++++++++++++++++++++------------------------- > 1 file changed, 141 insertions(+), 119 deletions(-) > > diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c > index f1cdeac..1dc967d 100644 > --- a/fs/udf/unicode.c > +++ b/fs/udf/unicode.c > @@ -28,9 +28,6 @@ > > #include "udf_sb.h" > > -static int udf_translate_to_linux(uint8_t *, int, const uint8_t *, int, > - const uint8_t *, int); > - > static int udf_uni2char_utf8(wchar_t uni, > unsigned char *out, > int boundlen) > @@ -114,13 +111,32 @@ static int udf_char2uni_utf8(const unsigned char *in, > return u_len; > } > > +#define ILLEGAL_CHAR_MARK '_' > +#define EXT_MARK '.' > +#define CRC_MARK '#' > +#define EXT_SIZE 5 > +/* Number of chars we need to store generated CRC to make filename unique */ > +#define CRC_LEN 5 > + > static int udf_name_from_CS0(uint8_t *str_o, int str_max_len, > const uint8_t *ocu, int ocu_len, > - int (*conv_f)(wchar_t, unsigned char *, int)) > + int (*conv_f)(wchar_t, unsigned char *, int), > + int translate) > { > + uint32_t c; > uint8_t cmp_id; > int i, len; > - int str_o_len = 0; > + int u_ch; > + int firstDots = 0, needsCRC = 0, illChar; > + int ext_i_len, ext_max_len; > + int str_o_len = 0; /* Length of resulting output */ > + int ext_o_len = 0; /* Extension output length */ > + int ext_crc_len = 0; /* Extension output length if used with CRC */ > + int i_ext = -1; /* Extension position in input buffer */ > + int o_crc = 0; /* Rightmost possible output pos for CRC+ext */ > + unsigned short valueCRC; > + uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1]; > + uint8_t crc[CRC_LEN]; > > if (str_max_len <= 0) > return 0; > @@ -133,22 +149,134 @@ static int udf_name_from_CS0(uint8_t *str_o, int > str_max_len, > cmp_id = ocu[0]; > if (cmp_id != 8 && cmp_id != 16) { > memset(str_o, 0, str_max_len); > - pr_err("unknown compression code (%d) stri=%s\n", cmp_id, ocu); > + pr_err("unknown compression code (%d)\n", cmp_id); > return -EINVAL; > } > + u_ch = cmp_id >> 3; > + > + ocu++; > + ocu_len--; > + Can we just check whether ocu_len is a multiple of u_ch here and bail out if not? That would save us some rounding below and also fix possible access beyond the end of the string in case fs is corrupted. > + if (translate) { > + /* Look for extension */ > + for (i = (ocu_len & ~(u_ch - 1)) - u_ch, ext_i_len = 0; > + (i >= 0) && (ext_i_len < EXT_SIZE); > + i -= u_ch, ext_i_len++) { > + Just remove the empty line above please. > + c = ocu[i]; > + if (u_ch > 1) > + c = (c << 8) | ocu[i + 1]; > + > + if (c == EXT_MARK) { > + if (ext_i_len) > + i_ext = i; > + break; > + } > + } > + if (i_ext >= 0) { > + /* Convert extension */ > + ext_max_len = min_t(int, sizeof(ext), str_max_len); > + ext[ext_o_len++] = EXT_MARK; > + illChar = 0; > + for (i = i_ext + u_ch; i < ocu_len;) { > + Remove the empty line here please. Also how about using i += u_ch in the for() above instead of i++ in the two lines below? > + c = ocu[i++]; > + if (u_ch > 1) > + c = (c << 8) | ocu[i++]; > + > + if (c == '/' || c == 0) { > + if (illChar) > + continue; > + illChar = 1; > + needsCRC = 1; > + c = ILLEGAL_CHAR_MARK; > + } else { > + illChar = 0; > + } > + > + len = conv_f(c, &ext[ext_o_len], > + ext_max_len - ext_o_len); > + /* Valid character? */ > + if (len >= 0) { > + ext_o_len += len; > + } else { > + ext[ext_o_len++] = '?'; > + needsCRC = 1; > + } > + if ((ext_o_len + CRC_LEN) < str_max_len) > + ext_crc_len = ext_o_len; > + } > + } > + } > + > + illChar = 0; > + for (i = 0; i < ocu_len;) { > + Again, remove the empty line, i += u_ch. > + if (str_o_len >= str_max_len) { > + needsCRC = 1; > + break; > + } > + > + if (translate && (i == i_ext)) { > + if (str_o_len > (str_max_len - ext_o_len)) > + needsCRC = 1; > + break; > + } > > - for (i = 1; (i < ocu_len) && (str_o_len < str_max_len);) { > /* Expand OSTA compressed Unicode to Unicode */ > - uint32_t c = ocu[i++]; > - if (cmp_id == 16) > + c = ocu[i++]; > + if (u_ch > 1) > c = (c << 8) | ocu[i++]; > > + if (translate) { > + if ((c == '.') && (firstDots >= 0)) > + firstDots++; > + else > + firstDots = -1; > + > + if (c == '/' || c == 0) { > + if (illChar) > + continue; > + illChar = 1; > + needsCRC = 1; > + c = ILLEGAL_CHAR_MARK; > + } else { > + illChar = 0; > + } > + } > + > len = conv_f(c, &str_o[str_o_len], str_max_len - str_o_len); > /* Valid character? */ > - if (len >= 0) > + if (len >= 0) { > str_o_len += len; > - else > + } else { > str_o[str_o_len++] = '?'; > + needsCRC = 1; > + } Umm, can you try factoring out body of this loop into a helper function and using it both during extension parsing and full name parsing? Also I think that checking whether the name is '.' or '..' could be moved just into the if (translate) below where you could just directly check it like: if (str_o_len <= 2 && str_o[0] == '.' && (str_o_len == 1 || str_o[1] == '.')) > + if (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)) > + o_crc = str_o_len; > + } > + > + if (translate) { > + if ((firstDots == 1) || (firstDots == 2)) > + needsCRC = 1; > + if (needsCRC) { > + str_o_len = o_crc; > + valueCRC = crc_itu_t(0, ocu, ocu_len); > + crc[0] = CRC_MARK; > + crc[1] = hex_asc_upper_hi(valueCRC >> 8); > + crc[2] = hex_asc_upper_lo(valueCRC >> 8); > + crc[3] = hex_asc_upper_hi(valueCRC); > + crc[4] = hex_asc_upper_lo(valueCRC); > + len = min_t(int, CRC_LEN, str_max_len - str_o_len); > + memcpy(&str_o[str_o_len], crc, len); > + str_o_len += len; > + ext_o_len = ext_crc_len; > + } > + if (ext_o_len > 0) { > + memcpy(&str_o[str_o_len], ext, ext_o_len); > + str_o_len += ext_o_len; > + } > } > > return str_o_len; Honza -- Jan Kara <j...@suse.com> SUSE Labs, CR -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/