Module Name:    src
Committed By:   mlelstv
Date:           Sat Jan 30 09:59:27 UTC 2016

Modified Files:
        src/sbin/mount_msdos: mount_msdos.8 mount_msdos.c
        src/sys/fs/msdosfs: direntry.h msdosfs_conv.c msdosfs_lookup.c
            msdosfs_vnops.c msdosfsmount.h
        src/usr.sbin/makefs: msdos.c
        src/usr.sbin/makefs/msdos: msdosfs_vfsops.c msdosfs_vnops.c

Log Message:
Add support to msdosfs and makefs to generate correct Unicode (UCS-2) directory
entries from UTF8 encoded file names.


To generate a diff of this commit:
cvs rdiff -u -r1.36 -r1.37 src/sbin/mount_msdos/mount_msdos.8
cvs rdiff -u -r1.47 -r1.48 src/sbin/mount_msdos/mount_msdos.c
cvs rdiff -u -r1.9 -r1.10 src/sys/fs/msdosfs/direntry.h
cvs rdiff -u -r1.10 -r1.11 src/sys/fs/msdosfs/msdosfs_conv.c
cvs rdiff -u -r1.34 -r1.35 src/sys/fs/msdosfs/msdosfs_lookup.c
cvs rdiff -u -r1.93 -r1.94 src/sys/fs/msdosfs/msdosfs_vnops.c
cvs rdiff -u -r1.20 -r1.21 src/sys/fs/msdosfs/msdosfsmount.h
cvs rdiff -u -r1.15 -r1.16 src/usr.sbin/makefs/msdos.c
cvs rdiff -u -r1.9 -r1.10 src/usr.sbin/makefs/msdos/msdosfs_vfsops.c
cvs rdiff -u -r1.16 -r1.17 src/usr.sbin/makefs/msdos/msdosfs_vnops.c

Please note that diffs are not public domain; they are subject to the
copyright notices on the relevant files.

Modified files:

Index: src/sbin/mount_msdos/mount_msdos.8
diff -u src/sbin/mount_msdos/mount_msdos.8:1.36 src/sbin/mount_msdos/mount_msdos.8:1.37
--- src/sbin/mount_msdos/mount_msdos.8:1.36	Fri Nov 16 15:00:18 2012
+++ src/sbin/mount_msdos/mount_msdos.8	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-.\" $NetBSD: mount_msdos.8,v 1.36 2012/11/16 15:00:18 tsutsui Exp $
+.\" $NetBSD: mount_msdos.8,v 1.37 2016/01/30 09:59:27 mlelstv Exp $
 .\"
 .\" Copyright (c) 1993, 1994 Christopher G. Demetriou
 .\" All rights reserved.
@@ -40,7 +40,7 @@
 .Nd mount an MS-DOS file system
 .Sh SYNOPSIS
 .Nm
-.Op Fl 9Gls
+.Op Fl 9GlsU
 .Op Fl g Ar gid
 .Op Fl M Ar mask
 .Op Fl m Ar mask
@@ -111,6 +111,19 @@ is the default.
 Otherwise
 .Fl l
 is assumed.
+.It Fl U
+The MS-DOS file system stores filenames in a short
+version using 8-bit characters according to some
+character set and a long version with 16-bit unicode
+characters.
+The default method to store encoding-agnostic UNIX filenames
+is to copy them byte-wise into both fields. This is
+transparent but generates wrong unicode characters
+for anything that is not ASCII. Setting the
+.Fl U
+flag interprets UNIX filenames as UTF-8 and generates
+correctly encoded long filenames. This forces
+.Fl l .
 .It Fl M Ar mask
 Specify the maximum file permissions for directories
 in the file system.

Index: src/sbin/mount_msdos/mount_msdos.c
diff -u src/sbin/mount_msdos/mount_msdos.c:1.47 src/sbin/mount_msdos/mount_msdos.c:1.48
--- src/sbin/mount_msdos/mount_msdos.c:1.47	Wed Oct  7 20:34:02 2009
+++ src/sbin/mount_msdos/mount_msdos.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/* $NetBSD: mount_msdos.c,v 1.47 2009/10/07 20:34:02 pooka Exp $ */
+/* $NetBSD: mount_msdos.c,v 1.48 2016/01/30 09:59:27 mlelstv Exp $ */
 
 /*
  * Copyright (c) 1994 Christopher G. Demetriou
@@ -36,7 +36,7 @@
 
 #include <sys/cdefs.h>
 #ifndef lint
-__RCSID("$NetBSD: mount_msdos.c,v 1.47 2009/10/07 20:34:02 pooka Exp $");
+__RCSID("$NetBSD: mount_msdos.c,v 1.48 2016/01/30 09:59:27 mlelstv Exp $");
 #endif /* not lint */
 
 #include <sys/param.h>
@@ -94,7 +94,7 @@ mount_msdos_parseargs(int argc, char **a
 	*mntflags = set_gid = set_uid = set_mask = set_dirmask = set_gmtoff = 0;
 	(void)memset(args, '\0', sizeof(*args));
 
-	while ((c = getopt(argc, argv, "Gsl9u:g:m:M:o:t:")) != -1) {
+	while ((c = getopt(argc, argv, "Gsl9Uu:g:m:M:o:t:")) != -1) {
 		switch (c) {
 		case 'G':
 			args->flags |= MSDOSFSMNT_GEMDOSFS;
@@ -108,6 +108,9 @@ mount_msdos_parseargs(int argc, char **a
 		case '9':
 			args->flags |= MSDOSFSMNT_NOWIN95;
 			break;
+		case 'U':
+			args->flags |= MSDOSFSMNT_UTF8;
+			break;
 		case 'u':
 			args->uid = a_uid(optarg);
 			set_uid = 1;

Index: src/sys/fs/msdosfs/direntry.h
diff -u src/sys/fs/msdosfs/direntry.h:1.9 src/sys/fs/msdosfs/direntry.h:1.10
--- src/sys/fs/msdosfs/direntry.h:1.9	Sat Jan 23 01:26:14 2016
+++ src/sys/fs/msdosfs/direntry.h	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: direntry.h,v 1.9 2016/01/23 01:26:14 dholland Exp $	*/
+/*	$NetBSD: direntry.h,v 1.10 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -136,11 +136,12 @@ int	dos2unixfn(unsigned char dn[11], uns
 int	unix2dosfn(const unsigned char *un, unsigned char dn[12], int unlen,
 	    unsigned int gen);
 int	unix2winfn(const unsigned char *un, int unlen, struct winentry *wep,
-	    int cnt, int chksum);
+	    int cnt, int chksum, int utf8);
 int	winChkName(const unsigned char *un, int unlen, struct winentry *wep,
-	    int chksum);
-int	win2unixfn(struct winentry *wep, struct dirent *dp, int chksum);
+	    int chksum, int utf8);
+int	win2unixfn(struct winentry *wep, struct dirent *dp, int chksum,	
+	    int utf8);
 uint8_t winChksum(uint8_t *name);
-int	winSlotCnt(const unsigned char *un, int unlen);
+int	winSlotCnt(const unsigned char *un, int unlen, int utf8);
 #endif /* _KERNEL || MAKEFS */
 #endif /* _MSDOSFS_DIRENTRY_H_ */

Index: src/sys/fs/msdosfs/msdosfs_conv.c
diff -u src/sys/fs/msdosfs/msdosfs_conv.c:1.10 src/sys/fs/msdosfs/msdosfs_conv.c:1.11
--- src/sys/fs/msdosfs/msdosfs_conv.c:1.10	Mon Sep  1 09:09:47 2014
+++ src/sys/fs/msdosfs/msdosfs_conv.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_conv.c,v 1.10 2014/09/01 09:09:47 martin Exp $	*/
+/*	$NetBSD: msdosfs_conv.c,v 1.11 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (C) 1995, 1997 Wolfgang Solfrank.
@@ -45,6 +45,16 @@
  * any damages caused by this software.
  *
  * October 1992
+ *
+ * 
+ * Unicode 5.0 case folding taken from
+ *
+ * http://www.unicode.org/Public/5.0.0/ucd/CaseFolding.txt
+ *
+ * Unicode Character Database
+ * Copyright (c) 1991-2006 Unicode, Inc.
+ * For terms of use, see http://www.unicode.org/terms_of_use.html
+ * For documentation, see UCD.html
  */
 
 #if HAVE_NBTOOL_CONFIG_H
@@ -52,13 +62,14 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_conv.c,v 1.10 2014/09/01 09:09:47 martin Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_conv.c,v 1.11 2016/01/30 09:59:27 mlelstv Exp $");
 
 /*
  * System include files.
  */
 #include <sys/param.h>
 #include <sys/time.h>
+#include <sys/endian.h>
 #ifdef _KERNEL
 #include <sys/dirent.h>
 #include <sys/systm.h>
@@ -78,6 +89,22 @@ __KERNEL_RCSID(0, "$NetBSD: msdosfs_conv
 #include <fs/msdosfs/direntry.h>
 #include <fs/msdosfs/denode.h>
 
+static int invalidname(const u_int16_t *, int);
+
+static int ucs2utf8(const u_int16_t *, u_int8_t *, int);
+static int utf8ucs2(const u_int8_t *, int, u_int16_t *);
+
+static int ucs2utf8str(const u_int16_t *, int, u_int8_t *, int);
+static int utf8ucs2str(const u_int8_t *, int, u_int16_t *, int);
+static int ucs2char8str(const u_int16_t *, int, u_int8_t *, int);
+static int char8ucs2str(const u_int8_t *, int, u_int16_t *, int);
+
+static void ucs2pad(u_int16_t *, int, int);
+
+static u_int16_t ucs2fold(u_int16_t);
+static int ucs2match(u_int16_t *, u_int16_t *, int n);
+static int char8match(u_int16_t *, u_int16_t *, int n);
+
 /*
  * The number of seconds between Jan 1, 1970 and Jan 1, 1980. In that
  * interval there were 8 regular years and 2 leap years.
@@ -284,6 +311,905 @@ u2l[256] = {
 	0xf8, 0xf9, 0xfa, 0xfb, 0xfc, 0xfd, 0xfe, 0xff, /* f8-ff */
 };
 
+/* Unicode case folding for codes 0x0000..0xffff */
+static const u_int16_t
+foldmap[] = {
+	0x0041, 0x0061, /* LATIN CAPITAL LETTER A */
+	0x0042, 0x0062, /* LATIN CAPITAL LETTER B */
+	0x0043, 0x0063, /* LATIN CAPITAL LETTER C */
+	0x0044, 0x0064, /* LATIN CAPITAL LETTER D */
+	0x0045, 0x0065, /* LATIN CAPITAL LETTER E */
+	0x0046, 0x0066, /* LATIN CAPITAL LETTER F */
+	0x0047, 0x0067, /* LATIN CAPITAL LETTER G */
+	0x0048, 0x0068, /* LATIN CAPITAL LETTER H */
+	0x0049, 0x0069, /* LATIN CAPITAL LETTER I */
+	0x004A, 0x006A, /* LATIN CAPITAL LETTER J */
+	0x004B, 0x006B, /* LATIN CAPITAL LETTER K */
+	0x004C, 0x006C, /* LATIN CAPITAL LETTER L */
+	0x004D, 0x006D, /* LATIN CAPITAL LETTER M */
+	0x004E, 0x006E, /* LATIN CAPITAL LETTER N */
+	0x004F, 0x006F, /* LATIN CAPITAL LETTER O */
+	0x0050, 0x0070, /* LATIN CAPITAL LETTER P */
+	0x0051, 0x0071, /* LATIN CAPITAL LETTER Q */
+	0x0052, 0x0072, /* LATIN CAPITAL LETTER R */
+	0x0053, 0x0073, /* LATIN CAPITAL LETTER S */
+	0x0054, 0x0074, /* LATIN CAPITAL LETTER T */
+	0x0055, 0x0075, /* LATIN CAPITAL LETTER U */
+	0x0056, 0x0076, /* LATIN CAPITAL LETTER V */
+	0x0057, 0x0077, /* LATIN CAPITAL LETTER W */
+	0x0058, 0x0078, /* LATIN CAPITAL LETTER X */
+	0x0059, 0x0079, /* LATIN CAPITAL LETTER Y */
+	0x005A, 0x007A, /* LATIN CAPITAL LETTER Z */
+	0x00B5, 0x03BC, /* MICRO SIGN */
+	0x00C0, 0x00E0, /* LATIN CAPITAL LETTER A WITH GRAVE */
+	0x00C1, 0x00E1, /* LATIN CAPITAL LETTER A WITH ACUTE */
+	0x00C2, 0x00E2, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
+	0x00C3, 0x00E3, /* LATIN CAPITAL LETTER A WITH TILDE */
+	0x00C4, 0x00E4, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
+	0x00C5, 0x00E5, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
+	0x00C6, 0x00E6, /* LATIN CAPITAL LETTER AE */
+	0x00C7, 0x00E7, /* LATIN CAPITAL LETTER C WITH CEDILLA */
+	0x00C8, 0x00E8, /* LATIN CAPITAL LETTER E WITH GRAVE */
+	0x00C9, 0x00E9, /* LATIN CAPITAL LETTER E WITH ACUTE */
+	0x00CA, 0x00EA, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
+	0x00CB, 0x00EB, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
+	0x00CC, 0x00EC, /* LATIN CAPITAL LETTER I WITH GRAVE */
+	0x00CD, 0x00ED, /* LATIN CAPITAL LETTER I WITH ACUTE */
+	0x00CE, 0x00EE, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
+	0x00CF, 0x00EF, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
+	0x00D0, 0x00F0, /* LATIN CAPITAL LETTER ETH */
+	0x00D1, 0x00F1, /* LATIN CAPITAL LETTER N WITH TILDE */
+	0x00D2, 0x00F2, /* LATIN CAPITAL LETTER O WITH GRAVE */
+	0x00D3, 0x00F3, /* LATIN CAPITAL LETTER O WITH ACUTE */
+	0x00D4, 0x00F4, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
+	0x00D5, 0x00F5, /* LATIN CAPITAL LETTER O WITH TILDE */
+	0x00D6, 0x00F6, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
+	0x00D8, 0x00F8, /* LATIN CAPITAL LETTER O WITH STROKE */
+	0x00D9, 0x00F9, /* LATIN CAPITAL LETTER U WITH GRAVE */
+	0x00DA, 0x00FA, /* LATIN CAPITAL LETTER U WITH ACUTE */
+	0x00DB, 0x00FB, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
+	0x00DC, 0x00FC, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
+	0x00DD, 0x00FD, /* LATIN CAPITAL LETTER Y WITH ACUTE */
+	0x00DE, 0x00FE, /* LATIN CAPITAL LETTER THORN */
+	0x0100, 0x0101, /* LATIN CAPITAL LETTER A WITH MACRON */
+	0x0102, 0x0103, /* LATIN CAPITAL LETTER A WITH BREVE */
+	0x0104, 0x0105, /* LATIN CAPITAL LETTER A WITH OGONEK */
+	0x0106, 0x0107, /* LATIN CAPITAL LETTER C WITH ACUTE */
+	0x0108, 0x0109, /* LATIN CAPITAL LETTER C WITH CIRCUMFLEX */
+	0x010A, 0x010B, /* LATIN CAPITAL LETTER C WITH DOT ABOVE */
+	0x010C, 0x010D, /* LATIN CAPITAL LETTER C WITH CARON */
+	0x010E, 0x010F, /* LATIN CAPITAL LETTER D WITH CARON */
+	0x0110, 0x0111, /* LATIN CAPITAL LETTER D WITH STROKE */
+	0x0112, 0x0113, /* LATIN CAPITAL LETTER E WITH MACRON */
+	0x0114, 0x0115, /* LATIN CAPITAL LETTER E WITH BREVE */
+	0x0116, 0x0117, /* LATIN CAPITAL LETTER E WITH DOT ABOVE */
+	0x0118, 0x0119, /* LATIN CAPITAL LETTER E WITH OGONEK */
+	0x011A, 0x011B, /* LATIN CAPITAL LETTER E WITH CARON */
+	0x011C, 0x011D, /* LATIN CAPITAL LETTER G WITH CIRCUMFLEX */
+	0x011E, 0x011F, /* LATIN CAPITAL LETTER G WITH BREVE */
+	0x0120, 0x0121, /* LATIN CAPITAL LETTER G WITH DOT ABOVE */
+	0x0122, 0x0123, /* LATIN CAPITAL LETTER G WITH CEDILLA */
+	0x0124, 0x0125, /* LATIN CAPITAL LETTER H WITH CIRCUMFLEX */
+	0x0126, 0x0127, /* LATIN CAPITAL LETTER H WITH STROKE */
+	0x0128, 0x0129, /* LATIN CAPITAL LETTER I WITH TILDE */
+	0x012A, 0x012B, /* LATIN CAPITAL LETTER I WITH MACRON */
+	0x012C, 0x012D, /* LATIN CAPITAL LETTER I WITH BREVE */
+	0x012E, 0x012F, /* LATIN CAPITAL LETTER I WITH OGONEK */
+	0x0132, 0x0133, /* LATIN CAPITAL LIGATURE IJ */
+	0x0134, 0x0135, /* LATIN CAPITAL LETTER J WITH CIRCUMFLEX */
+	0x0136, 0x0137, /* LATIN CAPITAL LETTER K WITH CEDILLA */
+	0x0139, 0x013A, /* LATIN CAPITAL LETTER L WITH ACUTE */
+	0x013B, 0x013C, /* LATIN CAPITAL LETTER L WITH CEDILLA */
+	0x013D, 0x013E, /* LATIN CAPITAL LETTER L WITH CARON */
+	0x013F, 0x0140, /* LATIN CAPITAL LETTER L WITH MIDDLE DOT */
+	0x0141, 0x0142, /* LATIN CAPITAL LETTER L WITH STROKE */
+	0x0143, 0x0144, /* LATIN CAPITAL LETTER N WITH ACUTE */
+	0x0145, 0x0146, /* LATIN CAPITAL LETTER N WITH CEDILLA */
+	0x0147, 0x0148, /* LATIN CAPITAL LETTER N WITH CARON */
+	0x014A, 0x014B, /* LATIN CAPITAL LETTER ENG */
+	0x014C, 0x014D, /* LATIN CAPITAL LETTER O WITH MACRON */
+	0x014E, 0x014F, /* LATIN CAPITAL LETTER O WITH BREVE */
+	0x0150, 0x0151, /* LATIN CAPITAL LETTER O WITH DOUBLE ACUTE */
+	0x0152, 0x0153, /* LATIN CAPITAL LIGATURE OE */
+	0x0154, 0x0155, /* LATIN CAPITAL LETTER R WITH ACUTE */
+	0x0156, 0x0157, /* LATIN CAPITAL LETTER R WITH CEDILLA */
+	0x0158, 0x0159, /* LATIN CAPITAL LETTER R WITH CARON */
+	0x015A, 0x015B, /* LATIN CAPITAL LETTER S WITH ACUTE */
+	0x015C, 0x015D, /* LATIN CAPITAL LETTER S WITH CIRCUMFLEX */
+	0x015E, 0x015F, /* LATIN CAPITAL LETTER S WITH CEDILLA */
+	0x0160, 0x0161, /* LATIN CAPITAL LETTER S WITH CARON */
+	0x0162, 0x0163, /* LATIN CAPITAL LETTER T WITH CEDILLA */
+	0x0164, 0x0165, /* LATIN CAPITAL LETTER T WITH CARON */
+	0x0166, 0x0167, /* LATIN CAPITAL LETTER T WITH STROKE */
+	0x0168, 0x0169, /* LATIN CAPITAL LETTER U WITH TILDE */
+	0x016A, 0x016B, /* LATIN CAPITAL LETTER U WITH MACRON */
+	0x016C, 0x016D, /* LATIN CAPITAL LETTER U WITH BREVE */
+	0x016E, 0x016F, /* LATIN CAPITAL LETTER U WITH RING ABOVE */
+	0x0170, 0x0171, /* LATIN CAPITAL LETTER U WITH DOUBLE ACUTE */
+	0x0172, 0x0173, /* LATIN CAPITAL LETTER U WITH OGONEK */
+	0x0174, 0x0175, /* LATIN CAPITAL LETTER W WITH CIRCUMFLEX */
+	0x0176, 0x0177, /* LATIN CAPITAL LETTER Y WITH CIRCUMFLEX */
+	0x0178, 0x00FF, /* LATIN CAPITAL LETTER Y WITH DIAERESIS */
+	0x0179, 0x017A, /* LATIN CAPITAL LETTER Z WITH ACUTE */
+	0x017B, 0x017C, /* LATIN CAPITAL LETTER Z WITH DOT ABOVE */
+	0x017D, 0x017E, /* LATIN CAPITAL LETTER Z WITH CARON */
+	0x017F, 0x0073, /* LATIN SMALL LETTER LONG S */
+	0x0181, 0x0253, /* LATIN CAPITAL LETTER B WITH HOOK */
+	0x0182, 0x0183, /* LATIN CAPITAL LETTER B WITH TOPBAR */
+	0x0184, 0x0185, /* LATIN CAPITAL LETTER TONE SIX */
+	0x0186, 0x0254, /* LATIN CAPITAL LETTER OPEN O */
+	0x0187, 0x0188, /* LATIN CAPITAL LETTER C WITH HOOK */
+	0x0189, 0x0256, /* LATIN CAPITAL LETTER AFRICAN D */
+	0x018A, 0x0257, /* LATIN CAPITAL LETTER D WITH HOOK */
+	0x018B, 0x018C, /* LATIN CAPITAL LETTER D WITH TOPBAR */
+	0x018E, 0x01DD, /* LATIN CAPITAL LETTER REVERSED E */
+	0x018F, 0x0259, /* LATIN CAPITAL LETTER SCHWA */
+	0x0190, 0x025B, /* LATIN CAPITAL LETTER OPEN E */
+	0x0191, 0x0192, /* LATIN CAPITAL LETTER F WITH HOOK */
+	0x0193, 0x0260, /* LATIN CAPITAL LETTER G WITH HOOK */
+	0x0194, 0x0263, /* LATIN CAPITAL LETTER GAMMA */
+	0x0196, 0x0269, /* LATIN CAPITAL LETTER IOTA */
+	0x0197, 0x0268, /* LATIN CAPITAL LETTER I WITH STROKE */
+	0x0198, 0x0199, /* LATIN CAPITAL LETTER K WITH HOOK */
+	0x019C, 0x026F, /* LATIN CAPITAL LETTER TURNED M */
+	0x019D, 0x0272, /* LATIN CAPITAL LETTER N WITH LEFT HOOK */
+	0x019F, 0x0275, /* LATIN CAPITAL LETTER O WITH MIDDLE TILDE */
+	0x01A0, 0x01A1, /* LATIN CAPITAL LETTER O WITH HORN */
+	0x01A2, 0x01A3, /* LATIN CAPITAL LETTER OI */
+	0x01A4, 0x01A5, /* LATIN CAPITAL LETTER P WITH HOOK */
+	0x01A6, 0x0280, /* LATIN LETTER YR */
+	0x01A7, 0x01A8, /* LATIN CAPITAL LETTER TONE TWO */
+	0x01A9, 0x0283, /* LATIN CAPITAL LETTER ESH */
+	0x01AC, 0x01AD, /* LATIN CAPITAL LETTER T WITH HOOK */
+	0x01AE, 0x0288, /* LATIN CAPITAL LETTER T WITH RETROFLEX HOOK */
+	0x01AF, 0x01B0, /* LATIN CAPITAL LETTER U WITH HORN */
+	0x01B1, 0x028A, /* LATIN CAPITAL LETTER UPSILON */
+	0x01B2, 0x028B, /* LATIN CAPITAL LETTER V WITH HOOK */
+	0x01B3, 0x01B4, /* LATIN CAPITAL LETTER Y WITH HOOK */
+	0x01B5, 0x01B6, /* LATIN CAPITAL LETTER Z WITH STROKE */
+	0x01B7, 0x0292, /* LATIN CAPITAL LETTER EZH */
+	0x01B8, 0x01B9, /* LATIN CAPITAL LETTER EZH REVERSED */
+	0x01BC, 0x01BD, /* LATIN CAPITAL LETTER TONE FIVE */
+	0x01C4, 0x01C6, /* LATIN CAPITAL LETTER DZ WITH CARON */
+	0x01C5, 0x01C6, /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON */
+	0x01C7, 0x01C9, /* LATIN CAPITAL LETTER LJ */
+	0x01C8, 0x01C9, /* LATIN CAPITAL LETTER L WITH SMALL LETTER J */
+	0x01CA, 0x01CC, /* LATIN CAPITAL LETTER NJ */
+	0x01CB, 0x01CC, /* LATIN CAPITAL LETTER N WITH SMALL LETTER J */
+	0x01CD, 0x01CE, /* LATIN CAPITAL LETTER A WITH CARON */
+	0x01CF, 0x01D0, /* LATIN CAPITAL LETTER I WITH CARON */
+	0x01D1, 0x01D2, /* LATIN CAPITAL LETTER O WITH CARON */
+	0x01D3, 0x01D4, /* LATIN CAPITAL LETTER U WITH CARON */
+	0x01D5, 0x01D6, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON */
+	0x01D7, 0x01D8, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE */
+	0x01D9, 0x01DA, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON */
+	0x01DB, 0x01DC, /* LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE */
+	0x01DE, 0x01DF, /* LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON */
+	0x01E0, 0x01E1, /* LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON */
+	0x01E2, 0x01E3, /* LATIN CAPITAL LETTER AE WITH MACRON */
+	0x01E4, 0x01E5, /* LATIN CAPITAL LETTER G WITH STROKE */
+	0x01E6, 0x01E7, /* LATIN CAPITAL LETTER G WITH CARON */
+	0x01E8, 0x01E9, /* LATIN CAPITAL LETTER K WITH CARON */
+	0x01EA, 0x01EB, /* LATIN CAPITAL LETTER O WITH OGONEK */
+	0x01EC, 0x01ED, /* LATIN CAPITAL LETTER O WITH OGONEK AND MACRON */
+	0x01EE, 0x01EF, /* LATIN CAPITAL LETTER EZH WITH CARON */
+	0x01F1, 0x01F3, /* LATIN CAPITAL LETTER DZ */
+	0x01F2, 0x01F3, /* LATIN CAPITAL LETTER D WITH SMALL LETTER Z */
+	0x01F4, 0x01F5, /* LATIN CAPITAL LETTER G WITH ACUTE */
+	0x01F6, 0x0195, /* LATIN CAPITAL LETTER HWAIR */
+	0x01F7, 0x01BF, /* LATIN CAPITAL LETTER WYNN */
+	0x01F8, 0x01F9, /* LATIN CAPITAL LETTER N WITH GRAVE */
+	0x01FA, 0x01FB, /* LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE */
+	0x01FC, 0x01FD, /* LATIN CAPITAL LETTER AE WITH ACUTE */
+	0x01FE, 0x01FF, /* LATIN CAPITAL LETTER O WITH STROKE AND ACUTE */
+	0x0200, 0x0201, /* LATIN CAPITAL LETTER A WITH DOUBLE GRAVE */
+	0x0202, 0x0203, /* LATIN CAPITAL LETTER A WITH INVERTED BREVE */
+	0x0204, 0x0205, /* LATIN CAPITAL LETTER E WITH DOUBLE GRAVE */
+	0x0206, 0x0207, /* LATIN CAPITAL LETTER E WITH INVERTED BREVE */
+	0x0208, 0x0209, /* LATIN CAPITAL LETTER I WITH DOUBLE GRAVE */
+	0x020A, 0x020B, /* LATIN CAPITAL LETTER I WITH INVERTED BREVE */
+	0x020C, 0x020D, /* LATIN CAPITAL LETTER O WITH DOUBLE GRAVE */
+	0x020E, 0x020F, /* LATIN CAPITAL LETTER O WITH INVERTED BREVE */
+	0x0210, 0x0211, /* LATIN CAPITAL LETTER R WITH DOUBLE GRAVE */
+	0x0212, 0x0213, /* LATIN CAPITAL LETTER R WITH INVERTED BREVE */
+	0x0214, 0x0215, /* LATIN CAPITAL LETTER U WITH DOUBLE GRAVE */
+	0x0216, 0x0217, /* LATIN CAPITAL LETTER U WITH INVERTED BREVE */
+	0x0218, 0x0219, /* LATIN CAPITAL LETTER S WITH COMMA BELOW */
+	0x021A, 0x021B, /* LATIN CAPITAL LETTER T WITH COMMA BELOW */
+	0x021C, 0x021D, /* LATIN CAPITAL LETTER YOGH */
+	0x021E, 0x021F, /* LATIN CAPITAL LETTER H WITH CARON */
+	0x0220, 0x019E, /* LATIN CAPITAL LETTER N WITH LONG RIGHT LEG */
+	0x0222, 0x0223, /* LATIN CAPITAL LETTER OU */
+	0x0224, 0x0225, /* LATIN CAPITAL LETTER Z WITH HOOK */
+	0x0226, 0x0227, /* LATIN CAPITAL LETTER A WITH DOT ABOVE */
+	0x0228, 0x0229, /* LATIN CAPITAL LETTER E WITH CEDILLA */
+	0x022A, 0x022B, /* LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON */
+	0x022C, 0x022D, /* LATIN CAPITAL LETTER O WITH TILDE AND MACRON */
+	0x022E, 0x022F, /* LATIN CAPITAL LETTER O WITH DOT ABOVE */
+	0x0230, 0x0231, /* LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON */
+	0x0232, 0x0233, /* LATIN CAPITAL LETTER Y WITH MACRON */
+	0x023A, 0x2C65, /* LATIN CAPITAL LETTER A WITH STROKE */
+	0x023B, 0x023C, /* LATIN CAPITAL LETTER C WITH STROKE */
+	0x023D, 0x019A, /* LATIN CAPITAL LETTER L WITH BAR */
+	0x023E, 0x2C66, /* LATIN CAPITAL LETTER T WITH DIAGONAL STROKE */
+	0x0241, 0x0242, /* LATIN CAPITAL LETTER GLOTTAL STOP */
+	0x0243, 0x0180, /* LATIN CAPITAL LETTER B WITH STROKE */
+	0x0244, 0x0289, /* LATIN CAPITAL LETTER U BAR */
+	0x0245, 0x028C, /* LATIN CAPITAL LETTER TURNED V */
+	0x0246, 0x0247, /* LATIN CAPITAL LETTER E WITH STROKE */
+	0x0248, 0x0249, /* LATIN CAPITAL LETTER J WITH STROKE */
+	0x024A, 0x024B, /* LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL */
+	0x024C, 0x024D, /* LATIN CAPITAL LETTER R WITH STROKE */
+	0x024E, 0x024F, /* LATIN CAPITAL LETTER Y WITH STROKE */
+	0x0345, 0x03B9, /* COMBINING GREEK YPOGEGRAMMENI */
+	0x0386, 0x03AC, /* GREEK CAPITAL LETTER ALPHA WITH TONOS */
+	0x0388, 0x03AD, /* GREEK CAPITAL LETTER EPSILON WITH TONOS */
+	0x0389, 0x03AE, /* GREEK CAPITAL LETTER ETA WITH TONOS */
+	0x038A, 0x03AF, /* GREEK CAPITAL LETTER IOTA WITH TONOS */
+	0x038C, 0x03CC, /* GREEK CAPITAL LETTER OMICRON WITH TONOS */
+	0x038E, 0x03CD, /* GREEK CAPITAL LETTER UPSILON WITH TONOS */
+	0x038F, 0x03CE, /* GREEK CAPITAL LETTER OMEGA WITH TONOS */
+	0x0391, 0x03B1, /* GREEK CAPITAL LETTER ALPHA */
+	0x0392, 0x03B2, /* GREEK CAPITAL LETTER BETA */
+	0x0393, 0x03B3, /* GREEK CAPITAL LETTER GAMMA */
+	0x0394, 0x03B4, /* GREEK CAPITAL LETTER DELTA */
+	0x0395, 0x03B5, /* GREEK CAPITAL LETTER EPSILON */
+	0x0396, 0x03B6, /* GREEK CAPITAL LETTER ZETA */
+	0x0397, 0x03B7, /* GREEK CAPITAL LETTER ETA */
+	0x0398, 0x03B8, /* GREEK CAPITAL LETTER THETA */
+	0x0399, 0x03B9, /* GREEK CAPITAL LETTER IOTA */
+	0x039A, 0x03BA, /* GREEK CAPITAL LETTER KAPPA */
+	0x039B, 0x03BB, /* GREEK CAPITAL LETTER LAMDA */
+	0x039C, 0x03BC, /* GREEK CAPITAL LETTER MU */
+	0x039D, 0x03BD, /* GREEK CAPITAL LETTER NU */
+	0x039E, 0x03BE, /* GREEK CAPITAL LETTER XI */
+	0x039F, 0x03BF, /* GREEK CAPITAL LETTER OMICRON */
+	0x03A0, 0x03C0, /* GREEK CAPITAL LETTER PI */
+	0x03A1, 0x03C1, /* GREEK CAPITAL LETTER RHO */
+	0x03A3, 0x03C3, /* GREEK CAPITAL LETTER SIGMA */
+	0x03A4, 0x03C4, /* GREEK CAPITAL LETTER TAU */
+	0x03A5, 0x03C5, /* GREEK CAPITAL LETTER UPSILON */
+	0x03A6, 0x03C6, /* GREEK CAPITAL LETTER PHI */
+	0x03A7, 0x03C7, /* GREEK CAPITAL LETTER CHI */
+	0x03A8, 0x03C8, /* GREEK CAPITAL LETTER PSI */
+	0x03A9, 0x03C9, /* GREEK CAPITAL LETTER OMEGA */
+	0x03AA, 0x03CA, /* GREEK CAPITAL LETTER IOTA WITH DIALYTIKA */
+	0x03AB, 0x03CB, /* GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA */
+	0x03C2, 0x03C3, /* GREEK SMALL LETTER FINAL SIGMA */
+	0x03D0, 0x03B2, /* GREEK BETA SYMBOL */
+	0x03D1, 0x03B8, /* GREEK THETA SYMBOL */
+	0x03D5, 0x03C6, /* GREEK PHI SYMBOL */
+	0x03D6, 0x03C0, /* GREEK PI SYMBOL */
+	0x03D8, 0x03D9, /* GREEK LETTER ARCHAIC KOPPA */
+	0x03DA, 0x03DB, /* GREEK LETTER STIGMA */
+	0x03DC, 0x03DD, /* GREEK LETTER DIGAMMA */
+	0x03DE, 0x03DF, /* GREEK LETTER KOPPA */
+	0x03E0, 0x03E1, /* GREEK LETTER SAMPI */
+	0x03E2, 0x03E3, /* COPTIC CAPITAL LETTER SHEI */
+	0x03E4, 0x03E5, /* COPTIC CAPITAL LETTER FEI */
+	0x03E6, 0x03E7, /* COPTIC CAPITAL LETTER KHEI */
+	0x03E8, 0x03E9, /* COPTIC CAPITAL LETTER HORI */
+	0x03EA, 0x03EB, /* COPTIC CAPITAL LETTER GANGIA */
+	0x03EC, 0x03ED, /* COPTIC CAPITAL LETTER SHIMA */
+	0x03EE, 0x03EF, /* COPTIC CAPITAL LETTER DEI */
+	0x03F0, 0x03BA, /* GREEK KAPPA SYMBOL */
+	0x03F1, 0x03C1, /* GREEK RHO SYMBOL */
+	0x03F4, 0x03B8, /* GREEK CAPITAL THETA SYMBOL */
+	0x03F5, 0x03B5, /* GREEK LUNATE EPSILON SYMBOL */
+	0x03F7, 0x03F8, /* GREEK CAPITAL LETTER SHO */
+	0x03F9, 0x03F2, /* GREEK CAPITAL LUNATE SIGMA SYMBOL */
+	0x03FA, 0x03FB, /* GREEK CAPITAL LETTER SAN */
+	0x03FD, 0x037B, /* GREEK CAPITAL REVERSED LUNATE SIGMA SYMBOL */
+	0x03FE, 0x037C, /* GREEK CAPITAL DOTTED LUNATE SIGMA SYMBOL */
+	0x03FF, 0x037D, /* GREEK CAPITAL REVERSED DOTTED LUNATE SIGMA SYMBOL */
+	0x0400, 0x0450, /* CYRILLIC CAPITAL LETTER IE WITH GRAVE */
+	0x0401, 0x0451, /* CYRILLIC CAPITAL LETTER IO */
+	0x0402, 0x0452, /* CYRILLIC CAPITAL LETTER DJE */
+	0x0403, 0x0453, /* CYRILLIC CAPITAL LETTER GJE */
+	0x0404, 0x0454, /* CYRILLIC CAPITAL LETTER UKRAINIAN IE */
+	0x0405, 0x0455, /* CYRILLIC CAPITAL LETTER DZE */
+	0x0406, 0x0456, /* CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I */
+	0x0407, 0x0457, /* CYRILLIC CAPITAL LETTER YI */
+	0x0408, 0x0458, /* CYRILLIC CAPITAL LETTER JE */
+	0x0409, 0x0459, /* CYRILLIC CAPITAL LETTER LJE */
+	0x040A, 0x045A, /* CYRILLIC CAPITAL LETTER NJE */
+	0x040B, 0x045B, /* CYRILLIC CAPITAL LETTER TSHE */
+	0x040C, 0x045C, /* CYRILLIC CAPITAL LETTER KJE */
+	0x040D, 0x045D, /* CYRILLIC CAPITAL LETTER I WITH GRAVE */
+	0x040E, 0x045E, /* CYRILLIC CAPITAL LETTER SHORT U */
+	0x040F, 0x045F, /* CYRILLIC CAPITAL LETTER DZHE */
+	0x0410, 0x0430, /* CYRILLIC CAPITAL LETTER A */
+	0x0411, 0x0431, /* CYRILLIC CAPITAL LETTER BE */
+	0x0412, 0x0432, /* CYRILLIC CAPITAL LETTER VE */
+	0x0413, 0x0433, /* CYRILLIC CAPITAL LETTER GHE */
+	0x0414, 0x0434, /* CYRILLIC CAPITAL LETTER DE */
+	0x0415, 0x0435, /* CYRILLIC CAPITAL LETTER IE */
+	0x0416, 0x0436, /* CYRILLIC CAPITAL LETTER ZHE */
+	0x0417, 0x0437, /* CYRILLIC CAPITAL LETTER ZE */
+	0x0418, 0x0438, /* CYRILLIC CAPITAL LETTER I */
+	0x0419, 0x0439, /* CYRILLIC CAPITAL LETTER SHORT I */
+	0x041A, 0x043A, /* CYRILLIC CAPITAL LETTER KA */
+	0x041B, 0x043B, /* CYRILLIC CAPITAL LETTER EL */
+	0x041C, 0x043C, /* CYRILLIC CAPITAL LETTER EM */
+	0x041D, 0x043D, /* CYRILLIC CAPITAL LETTER EN */
+	0x041E, 0x043E, /* CYRILLIC CAPITAL LETTER O */
+	0x041F, 0x043F, /* CYRILLIC CAPITAL LETTER PE */
+	0x0420, 0x0440, /* CYRILLIC CAPITAL LETTER ER */
+	0x0421, 0x0441, /* CYRILLIC CAPITAL LETTER ES */
+	0x0422, 0x0442, /* CYRILLIC CAPITAL LETTER TE */
+	0x0423, 0x0443, /* CYRILLIC CAPITAL LETTER U */
+	0x0424, 0x0444, /* CYRILLIC CAPITAL LETTER EF */
+	0x0425, 0x0445, /* CYRILLIC CAPITAL LETTER HA */
+	0x0426, 0x0446, /* CYRILLIC CAPITAL LETTER TSE */
+	0x0427, 0x0447, /* CYRILLIC CAPITAL LETTER CHE */
+	0x0428, 0x0448, /* CYRILLIC CAPITAL LETTER SHA */
+	0x0429, 0x0449, /* CYRILLIC CAPITAL LETTER SHCHA */
+	0x042A, 0x044A, /* CYRILLIC CAPITAL LETTER HARD SIGN */
+	0x042B, 0x044B, /* CYRILLIC CAPITAL LETTER YERU */
+	0x042C, 0x044C, /* CYRILLIC CAPITAL LETTER SOFT SIGN */
+	0x042D, 0x044D, /* CYRILLIC CAPITAL LETTER E */
+	0x042E, 0x044E, /* CYRILLIC CAPITAL LETTER YU */
+	0x042F, 0x044F, /* CYRILLIC CAPITAL LETTER YA */
+	0x0460, 0x0461, /* CYRILLIC CAPITAL LETTER OMEGA */
+	0x0462, 0x0463, /* CYRILLIC CAPITAL LETTER YAT */
+	0x0464, 0x0465, /* CYRILLIC CAPITAL LETTER IOTIFIED E */
+	0x0466, 0x0467, /* CYRILLIC CAPITAL LETTER LITTLE YUS */
+	0x0468, 0x0469, /* CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS */
+	0x046A, 0x046B, /* CYRILLIC CAPITAL LETTER BIG YUS */
+	0x046C, 0x046D, /* CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS */
+	0x046E, 0x046F, /* CYRILLIC CAPITAL LETTER KSI */
+	0x0470, 0x0471, /* CYRILLIC CAPITAL LETTER PSI */
+	0x0472, 0x0473, /* CYRILLIC CAPITAL LETTER FITA */
+	0x0474, 0x0475, /* CYRILLIC CAPITAL LETTER IZHITSA */
+	0x0476, 0x0477, /* CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT */
+	0x0478, 0x0479, /* CYRILLIC CAPITAL LETTER UK */
+	0x047A, 0x047B, /* CYRILLIC CAPITAL LETTER ROUND OMEGA */
+	0x047C, 0x047D, /* CYRILLIC CAPITAL LETTER OMEGA WITH TITLO */
+	0x047E, 0x047F, /* CYRILLIC CAPITAL LETTER OT */
+	0x0480, 0x0481, /* CYRILLIC CAPITAL LETTER KOPPA */
+	0x048A, 0x048B, /* CYRILLIC CAPITAL LETTER SHORT I WITH TAIL */
+	0x048C, 0x048D, /* CYRILLIC CAPITAL LETTER SEMISOFT SIGN */
+	0x048E, 0x048F, /* CYRILLIC CAPITAL LETTER ER WITH TICK */
+	0x0490, 0x0491, /* CYRILLIC CAPITAL LETTER GHE WITH UPTURN */
+	0x0492, 0x0493, /* CYRILLIC CAPITAL LETTER GHE WITH STROKE */
+	0x0494, 0x0495, /* CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK */
+	0x0496, 0x0497, /* CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER */
+	0x0498, 0x0499, /* CYRILLIC CAPITAL LETTER ZE WITH DESCENDER */
+	0x049A, 0x049B, /* CYRILLIC CAPITAL LETTER KA WITH DESCENDER */
+	0x049C, 0x049D, /* CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE */
+	0x049E, 0x049F, /* CYRILLIC CAPITAL LETTER KA WITH STROKE */
+	0x04A0, 0x04A1, /* CYRILLIC CAPITAL LETTER BASHKIR KA */
+	0x04A2, 0x04A3, /* CYRILLIC CAPITAL LETTER EN WITH DESCENDER */
+	0x04A4, 0x04A5, /* CYRILLIC CAPITAL LIGATURE EN GHE */
+	0x04A6, 0x04A7, /* CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK */
+	0x04A8, 0x04A9, /* CYRILLIC CAPITAL LETTER ABKHASIAN HA */
+	0x04AA, 0x04AB, /* CYRILLIC CAPITAL LETTER ES WITH DESCENDER */
+	0x04AC, 0x04AD, /* CYRILLIC CAPITAL LETTER TE WITH DESCENDER */
+	0x04AE, 0x04AF, /* CYRILLIC CAPITAL LETTER STRAIGHT U */
+	0x04B0, 0x04B1, /* CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE */
+	0x04B2, 0x04B3, /* CYRILLIC CAPITAL LETTER HA WITH DESCENDER */
+	0x04B4, 0x04B5, /* CYRILLIC CAPITAL LIGATURE TE TSE */
+	0x04B6, 0x04B7, /* CYRILLIC CAPITAL LETTER CHE WITH DESCENDER */
+	0x04B8, 0x04B9, /* CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE */
+	0x04BA, 0x04BB, /* CYRILLIC CAPITAL LETTER SHHA */
+	0x04BC, 0x04BD, /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE */
+	0x04BE, 0x04BF, /* CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER */
+	0x04C0, 0x04CF, /* CYRILLIC LETTER PALOCHKA */
+	0x04C1, 0x04C2, /* CYRILLIC CAPITAL LETTER ZHE WITH BREVE */
+	0x04C3, 0x04C4, /* CYRILLIC CAPITAL LETTER KA WITH HOOK */
+	0x04C5, 0x04C6, /* CYRILLIC CAPITAL LETTER EL WITH TAIL */
+	0x04C7, 0x04C8, /* CYRILLIC CAPITAL LETTER EN WITH HOOK */
+	0x04C9, 0x04CA, /* CYRILLIC CAPITAL LETTER EN WITH TAIL */
+	0x04CB, 0x04CC, /* CYRILLIC CAPITAL LETTER KHAKASSIAN CHE */
+	0x04CD, 0x04CE, /* CYRILLIC CAPITAL LETTER EM WITH TAIL */
+	0x04D0, 0x04D1, /* CYRILLIC CAPITAL LETTER A WITH BREVE */
+	0x04D2, 0x04D3, /* CYRILLIC CAPITAL LETTER A WITH DIAERESIS */
+	0x04D4, 0x04D5, /* CYRILLIC CAPITAL LIGATURE A IE */
+	0x04D6, 0x04D7, /* CYRILLIC CAPITAL LETTER IE WITH BREVE */
+	0x04D8, 0x04D9, /* CYRILLIC CAPITAL LETTER SCHWA */
+	0x04DA, 0x04DB, /* CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS */
+	0x04DC, 0x04DD, /* CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS */
+	0x04DE, 0x04DF, /* CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS */
+	0x04E0, 0x04E1, /* CYRILLIC CAPITAL LETTER ABKHASIAN DZE */
+	0x04E2, 0x04E3, /* CYRILLIC CAPITAL LETTER I WITH MACRON */
+	0x04E4, 0x04E5, /* CYRILLIC CAPITAL LETTER I WITH DIAERESIS */
+	0x04E6, 0x04E7, /* CYRILLIC CAPITAL LETTER O WITH DIAERESIS */
+	0x04E8, 0x04E9, /* CYRILLIC CAPITAL LETTER BARRED O */
+	0x04EA, 0x04EB, /* CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS */
+	0x04EC, 0x04ED, /* CYRILLIC CAPITAL LETTER E WITH DIAERESIS */
+	0x04EE, 0x04EF, /* CYRILLIC CAPITAL LETTER U WITH MACRON */
+	0x04F0, 0x04F1, /* CYRILLIC CAPITAL LETTER U WITH DIAERESIS */
+	0x04F2, 0x04F3, /* CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE */
+	0x04F4, 0x04F5, /* CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS */
+	0x04F6, 0x04F7, /* CYRILLIC CAPITAL LETTER GHE WITH DESCENDER */
+	0x04F8, 0x04F9, /* CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS */
+	0x04FA, 0x04FB, /* CYRILLIC CAPITAL LETTER GHE WITH STROKE AND HOOK */
+	0x04FC, 0x04FD, /* CYRILLIC CAPITAL LETTER HA WITH HOOK */
+	0x04FE, 0x04FF, /* CYRILLIC CAPITAL LETTER HA WITH STROKE */
+	0x0500, 0x0501, /* CYRILLIC CAPITAL LETTER KOMI DE */
+	0x0502, 0x0503, /* CYRILLIC CAPITAL LETTER KOMI DJE */
+	0x0504, 0x0505, /* CYRILLIC CAPITAL LETTER KOMI ZJE */
+	0x0506, 0x0507, /* CYRILLIC CAPITAL LETTER KOMI DZJE */
+	0x0508, 0x0509, /* CYRILLIC CAPITAL LETTER KOMI LJE */
+	0x050A, 0x050B, /* CYRILLIC CAPITAL LETTER KOMI NJE */
+	0x050C, 0x050D, /* CYRILLIC CAPITAL LETTER KOMI SJE */
+	0x050E, 0x050F, /* CYRILLIC CAPITAL LETTER KOMI TJE */
+	0x0510, 0x0511, /* CYRILLIC CAPITAL LETTER REVERSED ZE */
+	0x0512, 0x0513, /* CYRILLIC CAPITAL LETTER EL WITH HOOK */
+	0x0531, 0x0561, /* ARMENIAN CAPITAL LETTER AYB */
+	0x0532, 0x0562, /* ARMENIAN CAPITAL LETTER BEN */
+	0x0533, 0x0563, /* ARMENIAN CAPITAL LETTER GIM */
+	0x0534, 0x0564, /* ARMENIAN CAPITAL LETTER DA */
+	0x0535, 0x0565, /* ARMENIAN CAPITAL LETTER ECH */
+	0x0536, 0x0566, /* ARMENIAN CAPITAL LETTER ZA */
+	0x0537, 0x0567, /* ARMENIAN CAPITAL LETTER EH */
+	0x0538, 0x0568, /* ARMENIAN CAPITAL LETTER ET */
+	0x0539, 0x0569, /* ARMENIAN CAPITAL LETTER TO */
+	0x053A, 0x056A, /* ARMENIAN CAPITAL LETTER ZHE */
+	0x053B, 0x056B, /* ARMENIAN CAPITAL LETTER INI */
+	0x053C, 0x056C, /* ARMENIAN CAPITAL LETTER LIWN */
+	0x053D, 0x056D, /* ARMENIAN CAPITAL LETTER XEH */
+	0x053E, 0x056E, /* ARMENIAN CAPITAL LETTER CA */
+	0x053F, 0x056F, /* ARMENIAN CAPITAL LETTER KEN */
+	0x0540, 0x0570, /* ARMENIAN CAPITAL LETTER HO */
+	0x0541, 0x0571, /* ARMENIAN CAPITAL LETTER JA */
+	0x0542, 0x0572, /* ARMENIAN CAPITAL LETTER GHAD */
+	0x0543, 0x0573, /* ARMENIAN CAPITAL LETTER CHEH */
+	0x0544, 0x0574, /* ARMENIAN CAPITAL LETTER MEN */
+	0x0545, 0x0575, /* ARMENIAN CAPITAL LETTER YI */
+	0x0546, 0x0576, /* ARMENIAN CAPITAL LETTER NOW */
+	0x0547, 0x0577, /* ARMENIAN CAPITAL LETTER SHA */
+	0x0548, 0x0578, /* ARMENIAN CAPITAL LETTER VO */
+	0x0549, 0x0579, /* ARMENIAN CAPITAL LETTER CHA */
+	0x054A, 0x057A, /* ARMENIAN CAPITAL LETTER PEH */
+	0x054B, 0x057B, /* ARMENIAN CAPITAL LETTER JHEH */
+	0x054C, 0x057C, /* ARMENIAN CAPITAL LETTER RA */
+	0x054D, 0x057D, /* ARMENIAN CAPITAL LETTER SEH */
+	0x054E, 0x057E, /* ARMENIAN CAPITAL LETTER VEW */
+	0x054F, 0x057F, /* ARMENIAN CAPITAL LETTER TIWN */
+	0x0550, 0x0580, /* ARMENIAN CAPITAL LETTER REH */
+	0x0551, 0x0581, /* ARMENIAN CAPITAL LETTER CO */
+	0x0552, 0x0582, /* ARMENIAN CAPITAL LETTER YIWN */
+	0x0553, 0x0583, /* ARMENIAN CAPITAL LETTER PIWR */
+	0x0554, 0x0584, /* ARMENIAN CAPITAL LETTER KEH */
+	0x0555, 0x0585, /* ARMENIAN CAPITAL LETTER OH */
+	0x0556, 0x0586, /* ARMENIAN CAPITAL LETTER FEH */
+	0x10A0, 0x2D00, /* GEORGIAN CAPITAL LETTER AN */
+	0x10A1, 0x2D01, /* GEORGIAN CAPITAL LETTER BAN */
+	0x10A2, 0x2D02, /* GEORGIAN CAPITAL LETTER GAN */
+	0x10A3, 0x2D03, /* GEORGIAN CAPITAL LETTER DON */
+	0x10A4, 0x2D04, /* GEORGIAN CAPITAL LETTER EN */
+	0x10A5, 0x2D05, /* GEORGIAN CAPITAL LETTER VIN */
+	0x10A6, 0x2D06, /* GEORGIAN CAPITAL LETTER ZEN */
+	0x10A7, 0x2D07, /* GEORGIAN CAPITAL LETTER TAN */
+	0x10A8, 0x2D08, /* GEORGIAN CAPITAL LETTER IN */
+	0x10A9, 0x2D09, /* GEORGIAN CAPITAL LETTER KAN */
+	0x10AA, 0x2D0A, /* GEORGIAN CAPITAL LETTER LAS */
+	0x10AB, 0x2D0B, /* GEORGIAN CAPITAL LETTER MAN */
+	0x10AC, 0x2D0C, /* GEORGIAN CAPITAL LETTER NAR */
+	0x10AD, 0x2D0D, /* GEORGIAN CAPITAL LETTER ON */
+	0x10AE, 0x2D0E, /* GEORGIAN CAPITAL LETTER PAR */
+	0x10AF, 0x2D0F, /* GEORGIAN CAPITAL LETTER ZHAR */
+	0x10B0, 0x2D10, /* GEORGIAN CAPITAL LETTER RAE */
+	0x10B1, 0x2D11, /* GEORGIAN CAPITAL LETTER SAN */
+	0x10B2, 0x2D12, /* GEORGIAN CAPITAL LETTER TAR */
+	0x10B3, 0x2D13, /* GEORGIAN CAPITAL LETTER UN */
+	0x10B4, 0x2D14, /* GEORGIAN CAPITAL LETTER PHAR */
+	0x10B5, 0x2D15, /* GEORGIAN CAPITAL LETTER KHAR */
+	0x10B6, 0x2D16, /* GEORGIAN CAPITAL LETTER GHAN */
+	0x10B7, 0x2D17, /* GEORGIAN CAPITAL LETTER QAR */
+	0x10B8, 0x2D18, /* GEORGIAN CAPITAL LETTER SHIN */
+	0x10B9, 0x2D19, /* GEORGIAN CAPITAL LETTER CHIN */
+	0x10BA, 0x2D1A, /* GEORGIAN CAPITAL LETTER CAN */
+	0x10BB, 0x2D1B, /* GEORGIAN CAPITAL LETTER JIL */
+	0x10BC, 0x2D1C, /* GEORGIAN CAPITAL LETTER CIL */
+	0x10BD, 0x2D1D, /* GEORGIAN CAPITAL LETTER CHAR */
+	0x10BE, 0x2D1E, /* GEORGIAN CAPITAL LETTER XAN */
+	0x10BF, 0x2D1F, /* GEORGIAN CAPITAL LETTER JHAN */
+	0x10C0, 0x2D20, /* GEORGIAN CAPITAL LETTER HAE */
+	0x10C1, 0x2D21, /* GEORGIAN CAPITAL LETTER HE */
+	0x10C2, 0x2D22, /* GEORGIAN CAPITAL LETTER HIE */
+	0x10C3, 0x2D23, /* GEORGIAN CAPITAL LETTER WE */
+	0x10C4, 0x2D24, /* GEORGIAN CAPITAL LETTER HAR */
+	0x10C5, 0x2D25, /* GEORGIAN CAPITAL LETTER HOE */
+	0x1E00, 0x1E01, /* LATIN CAPITAL LETTER A WITH RING BELOW */
+	0x1E02, 0x1E03, /* LATIN CAPITAL LETTER B WITH DOT ABOVE */
+	0x1E04, 0x1E05, /* LATIN CAPITAL LETTER B WITH DOT BELOW */
+	0x1E06, 0x1E07, /* LATIN CAPITAL LETTER B WITH LINE BELOW */
+	0x1E08, 0x1E09, /* LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE */
+	0x1E0A, 0x1E0B, /* LATIN CAPITAL LETTER D WITH DOT ABOVE */
+	0x1E0C, 0x1E0D, /* LATIN CAPITAL LETTER D WITH DOT BELOW */
+	0x1E0E, 0x1E0F, /* LATIN CAPITAL LETTER D WITH LINE BELOW */
+	0x1E10, 0x1E11, /* LATIN CAPITAL LETTER D WITH CEDILLA */
+	0x1E12, 0x1E13, /* LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW */
+	0x1E14, 0x1E15, /* LATIN CAPITAL LETTER E WITH MACRON AND GRAVE */
+	0x1E16, 0x1E17, /* LATIN CAPITAL LETTER E WITH MACRON AND ACUTE */
+	0x1E18, 0x1E19, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW */
+	0x1E1A, 0x1E1B, /* LATIN CAPITAL LETTER E WITH TILDE BELOW */
+	0x1E1C, 0x1E1D, /* LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE */
+	0x1E1E, 0x1E1F, /* LATIN CAPITAL LETTER F WITH DOT ABOVE */
+	0x1E20, 0x1E21, /* LATIN CAPITAL LETTER G WITH MACRON */
+	0x1E22, 0x1E23, /* LATIN CAPITAL LETTER H WITH DOT ABOVE */
+	0x1E24, 0x1E25, /* LATIN CAPITAL LETTER H WITH DOT BELOW */
+	0x1E26, 0x1E27, /* LATIN CAPITAL LETTER H WITH DIAERESIS */
+	0x1E28, 0x1E29, /* LATIN CAPITAL LETTER H WITH CEDILLA */
+	0x1E2A, 0x1E2B, /* LATIN CAPITAL LETTER H WITH BREVE BELOW */
+	0x1E2C, 0x1E2D, /* LATIN CAPITAL LETTER I WITH TILDE BELOW */
+	0x1E2E, 0x1E2F, /* LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE */
+	0x1E30, 0x1E31, /* LATIN CAPITAL LETTER K WITH ACUTE */
+	0x1E32, 0x1E33, /* LATIN CAPITAL LETTER K WITH DOT BELOW */
+	0x1E34, 0x1E35, /* LATIN CAPITAL LETTER K WITH LINE BELOW */
+	0x1E36, 0x1E37, /* LATIN CAPITAL LETTER L WITH DOT BELOW */
+	0x1E38, 0x1E39, /* LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON */
+	0x1E3A, 0x1E3B, /* LATIN CAPITAL LETTER L WITH LINE BELOW */
+	0x1E3C, 0x1E3D, /* LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW */
+	0x1E3E, 0x1E3F, /* LATIN CAPITAL LETTER M WITH ACUTE */
+	0x1E40, 0x1E41, /* LATIN CAPITAL LETTER M WITH DOT ABOVE */
+	0x1E42, 0x1E43, /* LATIN CAPITAL LETTER M WITH DOT BELOW */
+	0x1E44, 0x1E45, /* LATIN CAPITAL LETTER N WITH DOT ABOVE */
+	0x1E46, 0x1E47, /* LATIN CAPITAL LETTER N WITH DOT BELOW */
+	0x1E48, 0x1E49, /* LATIN CAPITAL LETTER N WITH LINE BELOW */
+	0x1E4A, 0x1E4B, /* LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW */
+	0x1E4C, 0x1E4D, /* LATIN CAPITAL LETTER O WITH TILDE AND ACUTE */
+	0x1E4E, 0x1E4F, /* LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS */
+	0x1E50, 0x1E51, /* LATIN CAPITAL LETTER O WITH MACRON AND GRAVE */
+	0x1E52, 0x1E53, /* LATIN CAPITAL LETTER O WITH MACRON AND ACUTE */
+	0x1E54, 0x1E55, /* LATIN CAPITAL LETTER P WITH ACUTE */
+	0x1E56, 0x1E57, /* LATIN CAPITAL LETTER P WITH DOT ABOVE */
+	0x1E58, 0x1E59, /* LATIN CAPITAL LETTER R WITH DOT ABOVE */
+	0x1E5A, 0x1E5B, /* LATIN CAPITAL LETTER R WITH DOT BELOW */
+	0x1E5C, 0x1E5D, /* LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON */
+	0x1E5E, 0x1E5F, /* LATIN CAPITAL LETTER R WITH LINE BELOW */
+	0x1E60, 0x1E61, /* LATIN CAPITAL LETTER S WITH DOT ABOVE */
+	0x1E62, 0x1E63, /* LATIN CAPITAL LETTER S WITH DOT BELOW */
+	0x1E64, 0x1E65, /* LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE */
+	0x1E66, 0x1E67, /* LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE */
+	0x1E68, 0x1E69, /* LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE */
+	0x1E6A, 0x1E6B, /* LATIN CAPITAL LETTER T WITH DOT ABOVE */
+	0x1E6C, 0x1E6D, /* LATIN CAPITAL LETTER T WITH DOT BELOW */
+	0x1E6E, 0x1E6F, /* LATIN CAPITAL LETTER T WITH LINE BELOW */
+	0x1E70, 0x1E71, /* LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW */
+	0x1E72, 0x1E73, /* LATIN CAPITAL LETTER U WITH DIAERESIS BELOW */
+	0x1E74, 0x1E75, /* LATIN CAPITAL LETTER U WITH TILDE BELOW */
+	0x1E76, 0x1E77, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW */
+	0x1E78, 0x1E79, /* LATIN CAPITAL LETTER U WITH TILDE AND ACUTE */
+	0x1E7A, 0x1E7B, /* LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS */
+	0x1E7C, 0x1E7D, /* LATIN CAPITAL LETTER V WITH TILDE */
+	0x1E7E, 0x1E7F, /* LATIN CAPITAL LETTER V WITH DOT BELOW */
+	0x1E80, 0x1E81, /* LATIN CAPITAL LETTER W WITH GRAVE */
+	0x1E82, 0x1E83, /* LATIN CAPITAL LETTER W WITH ACUTE */
+	0x1E84, 0x1E85, /* LATIN CAPITAL LETTER W WITH DIAERESIS */
+	0x1E86, 0x1E87, /* LATIN CAPITAL LETTER W WITH DOT ABOVE */
+	0x1E88, 0x1E89, /* LATIN CAPITAL LETTER W WITH DOT BELOW */
+	0x1E8A, 0x1E8B, /* LATIN CAPITAL LETTER X WITH DOT ABOVE */
+	0x1E8C, 0x1E8D, /* LATIN CAPITAL LETTER X WITH DIAERESIS */
+	0x1E8E, 0x1E8F, /* LATIN CAPITAL LETTER Y WITH DOT ABOVE */
+	0x1E90, 0x1E91, /* LATIN CAPITAL LETTER Z WITH CIRCUMFLEX */
+	0x1E92, 0x1E93, /* LATIN CAPITAL LETTER Z WITH DOT BELOW */
+	0x1E94, 0x1E95, /* LATIN CAPITAL LETTER Z WITH LINE BELOW */
+	0x1E9B, 0x1E61, /* LATIN SMALL LETTER LONG S WITH DOT ABOVE */
+	0x1EA0, 0x1EA1, /* LATIN CAPITAL LETTER A WITH DOT BELOW */
+	0x1EA2, 0x1EA3, /* LATIN CAPITAL LETTER A WITH HOOK ABOVE */
+	0x1EA4, 0x1EA5, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE */
+	0x1EA6, 0x1EA7, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE */
+	0x1EA8, 0x1EA9, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE */
+	0x1EAA, 0x1EAB, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE */
+	0x1EAC, 0x1EAD, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW */
+	0x1EAE, 0x1EAF, /* LATIN CAPITAL LETTER A WITH BREVE AND ACUTE */
+	0x1EB0, 0x1EB1, /* LATIN CAPITAL LETTER A WITH BREVE AND GRAVE */
+	0x1EB2, 0x1EB3, /* LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE */
+	0x1EB4, 0x1EB5, /* LATIN CAPITAL LETTER A WITH BREVE AND TILDE */
+	0x1EB6, 0x1EB7, /* LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW */
+	0x1EB8, 0x1EB9, /* LATIN CAPITAL LETTER E WITH DOT BELOW */
+	0x1EBA, 0x1EBB, /* LATIN CAPITAL LETTER E WITH HOOK ABOVE */
+	0x1EBC, 0x1EBD, /* LATIN CAPITAL LETTER E WITH TILDE */
+	0x1EBE, 0x1EBF, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE */
+	0x1EC0, 0x1EC1, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE */
+	0x1EC2, 0x1EC3, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE */
+	0x1EC4, 0x1EC5, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE */
+	0x1EC6, 0x1EC7, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW */
+	0x1EC8, 0x1EC9, /* LATIN CAPITAL LETTER I WITH HOOK ABOVE */
+	0x1ECA, 0x1ECB, /* LATIN CAPITAL LETTER I WITH DOT BELOW */
+	0x1ECC, 0x1ECD, /* LATIN CAPITAL LETTER O WITH DOT BELOW */
+	0x1ECE, 0x1ECF, /* LATIN CAPITAL LETTER O WITH HOOK ABOVE */
+	0x1ED0, 0x1ED1, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE */
+	0x1ED2, 0x1ED3, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE */
+	0x1ED4, 0x1ED5, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE */
+	0x1ED6, 0x1ED7, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE */
+	0x1ED8, 0x1ED9, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW */
+	0x1EDA, 0x1EDB, /* LATIN CAPITAL LETTER O WITH HORN AND ACUTE */
+	0x1EDC, 0x1EDD, /* LATIN CAPITAL LETTER O WITH HORN AND GRAVE */
+	0x1EDE, 0x1EDF, /* LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE */
+	0x1EE0, 0x1EE1, /* LATIN CAPITAL LETTER O WITH HORN AND TILDE */
+	0x1EE2, 0x1EE3, /* LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW */
+	0x1EE4, 0x1EE5, /* LATIN CAPITAL LETTER U WITH DOT BELOW */
+	0x1EE6, 0x1EE7, /* LATIN CAPITAL LETTER U WITH HOOK ABOVE */
+	0x1EE8, 0x1EE9, /* LATIN CAPITAL LETTER U WITH HORN AND ACUTE */
+	0x1EEA, 0x1EEB, /* LATIN CAPITAL LETTER U WITH HORN AND GRAVE */
+	0x1EEC, 0x1EED, /* LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE */
+	0x1EEE, 0x1EEF, /* LATIN CAPITAL LETTER U WITH HORN AND TILDE */
+	0x1EF0, 0x1EF1, /* LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW */
+	0x1EF2, 0x1EF3, /* LATIN CAPITAL LETTER Y WITH GRAVE */
+	0x1EF4, 0x1EF5, /* LATIN CAPITAL LETTER Y WITH DOT BELOW */
+	0x1EF6, 0x1EF7, /* LATIN CAPITAL LETTER Y WITH HOOK ABOVE */
+	0x1EF8, 0x1EF9, /* LATIN CAPITAL LETTER Y WITH TILDE */
+	0x1F08, 0x1F00, /* GREEK CAPITAL LETTER ALPHA WITH PSILI */
+	0x1F09, 0x1F01, /* GREEK CAPITAL LETTER ALPHA WITH DASIA */
+	0x1F0A, 0x1F02, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA */
+	0x1F0B, 0x1F03, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA */
+	0x1F0C, 0x1F04, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA */
+	0x1F0D, 0x1F05, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA */
+	0x1F0E, 0x1F06, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI */
+	0x1F0F, 0x1F07, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI */
+	0x1F18, 0x1F10, /* GREEK CAPITAL LETTER EPSILON WITH PSILI */
+	0x1F19, 0x1F11, /* GREEK CAPITAL LETTER EPSILON WITH DASIA */
+	0x1F1A, 0x1F12, /* GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA */
+	0x1F1B, 0x1F13, /* GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA */
+	0x1F1C, 0x1F14, /* GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA */
+	0x1F1D, 0x1F15, /* GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA */
+	0x1F28, 0x1F20, /* GREEK CAPITAL LETTER ETA WITH PSILI */
+	0x1F29, 0x1F21, /* GREEK CAPITAL LETTER ETA WITH DASIA */
+	0x1F2A, 0x1F22, /* GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA */
+	0x1F2B, 0x1F23, /* GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA */
+	0x1F2C, 0x1F24, /* GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA */
+	0x1F2D, 0x1F25, /* GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA */
+	0x1F2E, 0x1F26, /* GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI */
+	0x1F2F, 0x1F27, /* GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI */
+	0x1F38, 0x1F30, /* GREEK CAPITAL LETTER IOTA WITH PSILI */
+	0x1F39, 0x1F31, /* GREEK CAPITAL LETTER IOTA WITH DASIA */
+	0x1F3A, 0x1F32, /* GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA */
+	0x1F3B, 0x1F33, /* GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA */
+	0x1F3C, 0x1F34, /* GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA */
+	0x1F3D, 0x1F35, /* GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA */
+	0x1F3E, 0x1F36, /* GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI */
+	0x1F3F, 0x1F37, /* GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI */
+	0x1F48, 0x1F40, /* GREEK CAPITAL LETTER OMICRON WITH PSILI */
+	0x1F49, 0x1F41, /* GREEK CAPITAL LETTER OMICRON WITH DASIA */
+	0x1F4A, 0x1F42, /* GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA */
+	0x1F4B, 0x1F43, /* GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA */
+	0x1F4C, 0x1F44, /* GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA */
+	0x1F4D, 0x1F45, /* GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA */
+	0x1F59, 0x1F51, /* GREEK CAPITAL LETTER UPSILON WITH DASIA */
+	0x1F5B, 0x1F53, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA */
+	0x1F5D, 0x1F55, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA */
+	0x1F5F, 0x1F57, /* GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI */
+	0x1F68, 0x1F60, /* GREEK CAPITAL LETTER OMEGA WITH PSILI */
+	0x1F69, 0x1F61, /* GREEK CAPITAL LETTER OMEGA WITH DASIA */
+	0x1F6A, 0x1F62, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA */
+	0x1F6B, 0x1F63, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA */
+	0x1F6C, 0x1F64, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA */
+	0x1F6D, 0x1F65, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA */
+	0x1F6E, 0x1F66, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI */
+	0x1F6F, 0x1F67, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI */
+	0x1F88, 0x1F80, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI */
+	0x1F89, 0x1F81, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI */
+	0x1F8A, 0x1F82, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
+	0x1F8B, 0x1F83, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
+	0x1F8C, 0x1F84, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
+	0x1F8D, 0x1F85, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
+	0x1F8E, 0x1F86, /* GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1F8F, 0x1F87, /* GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1F98, 0x1F90, /* GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI */
+	0x1F99, 0x1F91, /* GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI */
+	0x1F9A, 0x1F92, /* GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
+	0x1F9B, 0x1F93, /* GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
+	0x1F9C, 0x1F94, /* GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
+	0x1F9D, 0x1F95, /* GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
+	0x1F9E, 0x1F96, /* GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1F9F, 0x1F97, /* GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1FA8, 0x1FA0, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI */
+	0x1FA9, 0x1FA1, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI */
+	0x1FAA, 0x1FA2, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND PROSGEGRAMMENI */
+	0x1FAB, 0x1FA3, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND PROSGEGRAMMENI */
+	0x1FAC, 0x1FA4, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND PROSGEGRAMMENI */
+	0x1FAD, 0x1FA5, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND PROSGEGRAMMENI */
+	0x1FAE, 0x1FA6, /* GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1FAF, 0x1FA7, /* GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND PROSGEGRAMMENI */
+	0x1FB8, 0x1FB0, /* GREEK CAPITAL LETTER ALPHA WITH VRACHY */
+	0x1FB9, 0x1FB1, /* GREEK CAPITAL LETTER ALPHA WITH MACRON */
+	0x1FBA, 0x1F70, /* GREEK CAPITAL LETTER ALPHA WITH VARIA */
+	0x1FBB, 0x1F71, /* GREEK CAPITAL LETTER ALPHA WITH OXIA */
+	0x1FBC, 0x1FB3, /* GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI */
+	0x1FBE, 0x03B9, /* GREEK PROSGEGRAMMENI */
+	0x1FC8, 0x1F72, /* GREEK CAPITAL LETTER EPSILON WITH VARIA */
+	0x1FC9, 0x1F73, /* GREEK CAPITAL LETTER EPSILON WITH OXIA */
+	0x1FCA, 0x1F74, /* GREEK CAPITAL LETTER ETA WITH VARIA */
+	0x1FCB, 0x1F75, /* GREEK CAPITAL LETTER ETA WITH OXIA */
+	0x1FCC, 0x1FC3, /* GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI */
+	0x1FD8, 0x1FD0, /* GREEK CAPITAL LETTER IOTA WITH VRACHY */
+	0x1FD9, 0x1FD1, /* GREEK CAPITAL LETTER IOTA WITH MACRON */
+	0x1FDA, 0x1F76, /* GREEK CAPITAL LETTER IOTA WITH VARIA */
+	0x1FDB, 0x1F77, /* GREEK CAPITAL LETTER IOTA WITH OXIA */
+	0x1FE8, 0x1FE0, /* GREEK CAPITAL LETTER UPSILON WITH VRACHY */
+	0x1FE9, 0x1FE1, /* GREEK CAPITAL LETTER UPSILON WITH MACRON */
+	0x1FEA, 0x1F7A, /* GREEK CAPITAL LETTER UPSILON WITH VARIA */
+	0x1FEB, 0x1F7B, /* GREEK CAPITAL LETTER UPSILON WITH OXIA */
+	0x1FEC, 0x1FE5, /* GREEK CAPITAL LETTER RHO WITH DASIA */
+	0x1FF8, 0x1F78, /* GREEK CAPITAL LETTER OMICRON WITH VARIA */
+	0x1FF9, 0x1F79, /* GREEK CAPITAL LETTER OMICRON WITH OXIA */
+	0x1FFA, 0x1F7C, /* GREEK CAPITAL LETTER OMEGA WITH VARIA */
+	0x1FFB, 0x1F7D, /* GREEK CAPITAL LETTER OMEGA WITH OXIA */
+	0x1FFC, 0x1FF3, /* GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI */
+	0x2126, 0x03C9, /* OHM SIGN */
+	0x212A, 0x006B, /* KELVIN SIGN */
+	0x212B, 0x00E5, /* ANGSTROM SIGN */
+	0x2132, 0x214E, /* TURNED CAPITAL F */
+	0x2160, 0x2170, /* ROMAN NUMERAL ONE */
+	0x2161, 0x2171, /* ROMAN NUMERAL TWO */
+	0x2162, 0x2172, /* ROMAN NUMERAL THREE */
+	0x2163, 0x2173, /* ROMAN NUMERAL FOUR */
+	0x2164, 0x2174, /* ROMAN NUMERAL FIVE */
+	0x2165, 0x2175, /* ROMAN NUMERAL SIX */
+	0x2166, 0x2176, /* ROMAN NUMERAL SEVEN */
+	0x2167, 0x2177, /* ROMAN NUMERAL EIGHT */
+	0x2168, 0x2178, /* ROMAN NUMERAL NINE */
+	0x2169, 0x2179, /* ROMAN NUMERAL TEN */
+	0x216A, 0x217A, /* ROMAN NUMERAL ELEVEN */
+	0x216B, 0x217B, /* ROMAN NUMERAL TWELVE */
+	0x216C, 0x217C, /* ROMAN NUMERAL FIFTY */
+	0x216D, 0x217D, /* ROMAN NUMERAL ONE HUNDRED */
+	0x216E, 0x217E, /* ROMAN NUMERAL FIVE HUNDRED */
+	0x216F, 0x217F, /* ROMAN NUMERAL ONE THOUSAND */
+	0x2183, 0x2184, /* ROMAN NUMERAL REVERSED ONE HUNDRED */
+	0x24B6, 0x24D0, /* CIRCLED LATIN CAPITAL LETTER A */
+	0x24B7, 0x24D1, /* CIRCLED LATIN CAPITAL LETTER B */
+	0x24B8, 0x24D2, /* CIRCLED LATIN CAPITAL LETTER C */
+	0x24B9, 0x24D3, /* CIRCLED LATIN CAPITAL LETTER D */
+	0x24BA, 0x24D4, /* CIRCLED LATIN CAPITAL LETTER E */
+	0x24BB, 0x24D5, /* CIRCLED LATIN CAPITAL LETTER F */
+	0x24BC, 0x24D6, /* CIRCLED LATIN CAPITAL LETTER G */
+	0x24BD, 0x24D7, /* CIRCLED LATIN CAPITAL LETTER H */
+	0x24BE, 0x24D8, /* CIRCLED LATIN CAPITAL LETTER I */
+	0x24BF, 0x24D9, /* CIRCLED LATIN CAPITAL LETTER J */
+	0x24C0, 0x24DA, /* CIRCLED LATIN CAPITAL LETTER K */
+	0x24C1, 0x24DB, /* CIRCLED LATIN CAPITAL LETTER L */
+	0x24C2, 0x24DC, /* CIRCLED LATIN CAPITAL LETTER M */
+	0x24C3, 0x24DD, /* CIRCLED LATIN CAPITAL LETTER N */
+	0x24C4, 0x24DE, /* CIRCLED LATIN CAPITAL LETTER O */
+	0x24C5, 0x24DF, /* CIRCLED LATIN CAPITAL LETTER P */
+	0x24C6, 0x24E0, /* CIRCLED LATIN CAPITAL LETTER Q */
+	0x24C7, 0x24E1, /* CIRCLED LATIN CAPITAL LETTER R */
+	0x24C8, 0x24E2, /* CIRCLED LATIN CAPITAL LETTER S */
+	0x24C9, 0x24E3, /* CIRCLED LATIN CAPITAL LETTER T */
+	0x24CA, 0x24E4, /* CIRCLED LATIN CAPITAL LETTER U */
+	0x24CB, 0x24E5, /* CIRCLED LATIN CAPITAL LETTER V */
+	0x24CC, 0x24E6, /* CIRCLED LATIN CAPITAL LETTER W */
+	0x24CD, 0x24E7, /* CIRCLED LATIN CAPITAL LETTER X */
+	0x24CE, 0x24E8, /* CIRCLED LATIN CAPITAL LETTER Y */
+	0x24CF, 0x24E9, /* CIRCLED LATIN CAPITAL LETTER Z */
+	0x2C00, 0x2C30, /* GLAGOLITIC CAPITAL LETTER AZU */
+	0x2C01, 0x2C31, /* GLAGOLITIC CAPITAL LETTER BUKY */
+	0x2C02, 0x2C32, /* GLAGOLITIC CAPITAL LETTER VEDE */
+	0x2C03, 0x2C33, /* GLAGOLITIC CAPITAL LETTER GLAGOLI */
+	0x2C04, 0x2C34, /* GLAGOLITIC CAPITAL LETTER DOBRO */
+	0x2C05, 0x2C35, /* GLAGOLITIC CAPITAL LETTER YESTU */
+	0x2C06, 0x2C36, /* GLAGOLITIC CAPITAL LETTER ZHIVETE */
+	0x2C07, 0x2C37, /* GLAGOLITIC CAPITAL LETTER DZELO */
+	0x2C08, 0x2C38, /* GLAGOLITIC CAPITAL LETTER ZEMLJA */
+	0x2C09, 0x2C39, /* GLAGOLITIC CAPITAL LETTER IZHE */
+	0x2C0A, 0x2C3A, /* GLAGOLITIC CAPITAL LETTER INITIAL IZHE */
+	0x2C0B, 0x2C3B, /* GLAGOLITIC CAPITAL LETTER I */
+	0x2C0C, 0x2C3C, /* GLAGOLITIC CAPITAL LETTER DJERVI */
+	0x2C0D, 0x2C3D, /* GLAGOLITIC CAPITAL LETTER KAKO */
+	0x2C0E, 0x2C3E, /* GLAGOLITIC CAPITAL LETTER LJUDIJE */
+	0x2C0F, 0x2C3F, /* GLAGOLITIC CAPITAL LETTER MYSLITE */
+	0x2C10, 0x2C40, /* GLAGOLITIC CAPITAL LETTER NASHI */
+	0x2C11, 0x2C41, /* GLAGOLITIC CAPITAL LETTER ONU */
+	0x2C12, 0x2C42, /* GLAGOLITIC CAPITAL LETTER POKOJI */
+	0x2C13, 0x2C43, /* GLAGOLITIC CAPITAL LETTER RITSI */
+	0x2C14, 0x2C44, /* GLAGOLITIC CAPITAL LETTER SLOVO */
+	0x2C15, 0x2C45, /* GLAGOLITIC CAPITAL LETTER TVRIDO */
+	0x2C16, 0x2C46, /* GLAGOLITIC CAPITAL LETTER UKU */
+	0x2C17, 0x2C47, /* GLAGOLITIC CAPITAL LETTER FRITU */
+	0x2C18, 0x2C48, /* GLAGOLITIC CAPITAL LETTER HERU */
+	0x2C19, 0x2C49, /* GLAGOLITIC CAPITAL LETTER OTU */
+	0x2C1A, 0x2C4A, /* GLAGOLITIC CAPITAL LETTER PE */
+	0x2C1B, 0x2C4B, /* GLAGOLITIC CAPITAL LETTER SHTA */
+	0x2C1C, 0x2C4C, /* GLAGOLITIC CAPITAL LETTER TSI */
+	0x2C1D, 0x2C4D, /* GLAGOLITIC CAPITAL LETTER CHRIVI */
+	0x2C1E, 0x2C4E, /* GLAGOLITIC CAPITAL LETTER SHA */
+	0x2C1F, 0x2C4F, /* GLAGOLITIC CAPITAL LETTER YERU */
+	0x2C20, 0x2C50, /* GLAGOLITIC CAPITAL LETTER YERI */
+	0x2C21, 0x2C51, /* GLAGOLITIC CAPITAL LETTER YATI */
+	0x2C22, 0x2C52, /* GLAGOLITIC CAPITAL LETTER SPIDERY HA */
+	0x2C23, 0x2C53, /* GLAGOLITIC CAPITAL LETTER YU */
+	0x2C24, 0x2C54, /* GLAGOLITIC CAPITAL LETTER SMALL YUS */
+	0x2C25, 0x2C55, /* GLAGOLITIC CAPITAL LETTER SMALL YUS WITH TAIL */
+	0x2C26, 0x2C56, /* GLAGOLITIC CAPITAL LETTER YO */
+	0x2C27, 0x2C57, /* GLAGOLITIC CAPITAL LETTER IOTATED SMALL YUS */
+	0x2C28, 0x2C58, /* GLAGOLITIC CAPITAL LETTER BIG YUS */
+	0x2C29, 0x2C59, /* GLAGOLITIC CAPITAL LETTER IOTATED BIG YUS */
+	0x2C2A, 0x2C5A, /* GLAGOLITIC CAPITAL LETTER FITA */
+	0x2C2B, 0x2C5B, /* GLAGOLITIC CAPITAL LETTER IZHITSA */
+	0x2C2C, 0x2C5C, /* GLAGOLITIC CAPITAL LETTER SHTAPIC */
+	0x2C2D, 0x2C5D, /* GLAGOLITIC CAPITAL LETTER TROKUTASTI A */
+	0x2C2E, 0x2C5E, /* GLAGOLITIC CAPITAL LETTER LATINATE MYSLITE */
+	0x2C60, 0x2C61, /* LATIN CAPITAL LETTER L WITH DOUBLE BAR */
+	0x2C62, 0x026B, /* LATIN CAPITAL LETTER L WITH MIDDLE TILDE */
+	0x2C63, 0x1D7D, /* LATIN CAPITAL LETTER P WITH STROKE */
+	0x2C64, 0x027D, /* LATIN CAPITAL LETTER R WITH TAIL */
+	0x2C67, 0x2C68, /* LATIN CAPITAL LETTER H WITH DESCENDER */
+	0x2C69, 0x2C6A, /* LATIN CAPITAL LETTER K WITH DESCENDER */
+	0x2C6B, 0x2C6C, /* LATIN CAPITAL LETTER Z WITH DESCENDER */
+	0x2C75, 0x2C76, /* LATIN CAPITAL LETTER HALF H */
+	0x2C80, 0x2C81, /* COPTIC CAPITAL LETTER ALFA */
+	0x2C82, 0x2C83, /* COPTIC CAPITAL LETTER VIDA */
+	0x2C84, 0x2C85, /* COPTIC CAPITAL LETTER GAMMA */
+	0x2C86, 0x2C87, /* COPTIC CAPITAL LETTER DALDA */
+	0x2C88, 0x2C89, /* COPTIC CAPITAL LETTER EIE */
+	0x2C8A, 0x2C8B, /* COPTIC CAPITAL LETTER SOU */
+	0x2C8C, 0x2C8D, /* COPTIC CAPITAL LETTER ZATA */
+	0x2C8E, 0x2C8F, /* COPTIC CAPITAL LETTER HATE */
+	0x2C90, 0x2C91, /* COPTIC CAPITAL LETTER THETHE */
+	0x2C92, 0x2C93, /* COPTIC CAPITAL LETTER IAUDA */
+	0x2C94, 0x2C95, /* COPTIC CAPITAL LETTER KAPA */
+	0x2C96, 0x2C97, /* COPTIC CAPITAL LETTER LAULA */
+	0x2C98, 0x2C99, /* COPTIC CAPITAL LETTER MI */
+	0x2C9A, 0x2C9B, /* COPTIC CAPITAL LETTER NI */
+	0x2C9C, 0x2C9D, /* COPTIC CAPITAL LETTER KSI */
+	0x2C9E, 0x2C9F, /* COPTIC CAPITAL LETTER O */
+	0x2CA0, 0x2CA1, /* COPTIC CAPITAL LETTER PI */
+	0x2CA2, 0x2CA3, /* COPTIC CAPITAL LETTER RO */
+	0x2CA4, 0x2CA5, /* COPTIC CAPITAL LETTER SIMA */
+	0x2CA6, 0x2CA7, /* COPTIC CAPITAL LETTER TAU */
+	0x2CA8, 0x2CA9, /* COPTIC CAPITAL LETTER UA */
+	0x2CAA, 0x2CAB, /* COPTIC CAPITAL LETTER FI */
+	0x2CAC, 0x2CAD, /* COPTIC CAPITAL LETTER KHI */
+	0x2CAE, 0x2CAF, /* COPTIC CAPITAL LETTER PSI */
+	0x2CB0, 0x2CB1, /* COPTIC CAPITAL LETTER OOU */
+	0x2CB2, 0x2CB3, /* COPTIC CAPITAL LETTER DIALECT-P ALEF */
+	0x2CB4, 0x2CB5, /* COPTIC CAPITAL LETTER OLD COPTIC AIN */
+	0x2CB6, 0x2CB7, /* COPTIC CAPITAL LETTER CRYPTOGRAMMIC EIE */
+	0x2CB8, 0x2CB9, /* COPTIC CAPITAL LETTER DIALECT-P KAPA */
+	0x2CBA, 0x2CBB, /* COPTIC CAPITAL LETTER DIALECT-P NI */
+	0x2CBC, 0x2CBD, /* COPTIC CAPITAL LETTER CRYPTOGRAMMIC NI */
+	0x2CBE, 0x2CBF, /* COPTIC CAPITAL LETTER OLD COPTIC OOU */
+	0x2CC0, 0x2CC1, /* COPTIC CAPITAL LETTER SAMPI */
+	0x2CC2, 0x2CC3, /* COPTIC CAPITAL LETTER CROSSED SHEI */
+	0x2CC4, 0x2CC5, /* COPTIC CAPITAL LETTER OLD COPTIC SHEI */
+	0x2CC6, 0x2CC7, /* COPTIC CAPITAL LETTER OLD COPTIC ESH */
+	0x2CC8, 0x2CC9, /* COPTIC CAPITAL LETTER AKHMIMIC KHEI */
+	0x2CCA, 0x2CCB, /* COPTIC CAPITAL LETTER DIALECT-P HORI */
+	0x2CCC, 0x2CCD, /* COPTIC CAPITAL LETTER OLD COPTIC HORI */
+	0x2CCE, 0x2CCF, /* COPTIC CAPITAL LETTER OLD COPTIC HA */
+	0x2CD0, 0x2CD1, /* COPTIC CAPITAL LETTER L-SHAPED HA */
+	0x2CD2, 0x2CD3, /* COPTIC CAPITAL LETTER OLD COPTIC HEI */
+	0x2CD4, 0x2CD5, /* COPTIC CAPITAL LETTER OLD COPTIC HAT */
+	0x2CD6, 0x2CD7, /* COPTIC CAPITAL LETTER OLD COPTIC GANGIA */
+	0x2CD8, 0x2CD9, /* COPTIC CAPITAL LETTER OLD COPTIC DJA */
+	0x2CDA, 0x2CDB, /* COPTIC CAPITAL LETTER OLD COPTIC SHIMA */
+	0x2CDC, 0x2CDD, /* COPTIC CAPITAL LETTER OLD NUBIAN SHIMA */
+	0x2CDE, 0x2CDF, /* COPTIC CAPITAL LETTER OLD NUBIAN NGI */
+	0x2CE0, 0x2CE1, /* COPTIC CAPITAL LETTER OLD NUBIAN NYI */
+	0x2CE2, 0x2CE3, /* COPTIC CAPITAL LETTER OLD NUBIAN WAU */
+	0xFF21, 0xFF41, /* FULLWIDTH LATIN CAPITAL LETTER A */
+	0xFF22, 0xFF42, /* FULLWIDTH LATIN CAPITAL LETTER B */
+	0xFF23, 0xFF43, /* FULLWIDTH LATIN CAPITAL LETTER C */
+	0xFF24, 0xFF44, /* FULLWIDTH LATIN CAPITAL LETTER D */
+	0xFF25, 0xFF45, /* FULLWIDTH LATIN CAPITAL LETTER E */
+	0xFF26, 0xFF46, /* FULLWIDTH LATIN CAPITAL LETTER F */
+	0xFF27, 0xFF47, /* FULLWIDTH LATIN CAPITAL LETTER G */
+	0xFF28, 0xFF48, /* FULLWIDTH LATIN CAPITAL LETTER H */
+	0xFF29, 0xFF49, /* FULLWIDTH LATIN CAPITAL LETTER I */
+	0xFF2A, 0xFF4A, /* FULLWIDTH LATIN CAPITAL LETTER J */
+	0xFF2B, 0xFF4B, /* FULLWIDTH LATIN CAPITAL LETTER K */
+	0xFF2C, 0xFF4C, /* FULLWIDTH LATIN CAPITAL LETTER L */
+	0xFF2D, 0xFF4D, /* FULLWIDTH LATIN CAPITAL LETTER M */
+	0xFF2E, 0xFF4E, /* FULLWIDTH LATIN CAPITAL LETTER N */
+	0xFF2F, 0xFF4F, /* FULLWIDTH LATIN CAPITAL LETTER O */
+	0xFF30, 0xFF50, /* FULLWIDTH LATIN CAPITAL LETTER P */
+	0xFF31, 0xFF51, /* FULLWIDTH LATIN CAPITAL LETTER Q */
+	0xFF32, 0xFF52, /* FULLWIDTH LATIN CAPITAL LETTER R */
+	0xFF33, 0xFF53, /* FULLWIDTH LATIN CAPITAL LETTER S */
+	0xFF34, 0xFF54, /* FULLWIDTH LATIN CAPITAL LETTER T */
+	0xFF35, 0xFF55, /* FULLWIDTH LATIN CAPITAL LETTER U */
+	0xFF36, 0xFF56, /* FULLWIDTH LATIN CAPITAL LETTER V */
+	0xFF37, 0xFF57, /* FULLWIDTH LATIN CAPITAL LETTER W */
+	0xFF38, 0xFF58, /* FULLWIDTH LATIN CAPITAL LETTER X */
+	0xFF39, 0xFF59, /* FULLWIDTH LATIN CAPITAL LETTER Y */
+	0xFF3A, 0xFF5A, /* FULLWIDTH LATIN CAPITAL LETTER Z */
+};
+
 /*
  * DOS filenames are made of 2 parts, the name part and the extension part.
  * The name part is 8 characters long and the extension part is 3
@@ -513,24 +1439,34 @@ unix2dosfn(const u_char *un, u_char dn[1
  *	 i.e. doesn't consist solely of blanks and dots
  */
 int
-unix2winfn(const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum)
+unix2winfn(const u_char *un, int unlen, struct winentry *wep, int cnt, int chksum, int utf8)
 {
-	const u_int8_t *cp;
-	u_int8_t *wcp;
-	int i;
+	u_int16_t wn[WIN_MAXLEN], *p;
+	int i, len;
+	const u_char *cp;
 
 	/*
 	 * Drop trailing blanks and dots
 	 */
-	for (cp = un + unlen; *--cp == ' ' || *cp == '.'; unlen--);
+	for (cp = un + unlen; unlen > 0; unlen--)
+		if (*--cp != ' ' && *cp != '.')
+			break;
 
-	un += (cnt - 1) * WIN_CHARS;
-	unlen -= (cnt - 1) * WIN_CHARS;
+	/*
+	 * Offset of this entry
+	 */
+	i = (cnt - 1) * WIN_CHARS;
+
+	/*
+	 * Translate UNIX name to ucs-2
+	 */
+	len = utf8 ? utf8ucs2str(un, unlen, wn, WIN_MAXLEN) : char8ucs2str(un, unlen, wn, WIN_MAXLEN);
+	ucs2pad(wn, len, WIN_MAXLEN);
 
 	/*
 	 * Initialize winentry to some useful default
 	 */
-	for (wcp = (u_int8_t *)wep, i = sizeof(*wep); --i >= 0; *wcp++ = 0xff);
+	memset(wep, 0xff, sizeof(*wep));
 	wep->weCnt = cnt;
 	wep->weAttributes = ATTR_WIN95;
 	wep->weReserved1 = 0;
@@ -538,33 +1474,18 @@ unix2winfn(const u_char *un, int unlen, 
 	wep->weReserved2 = 0;
 
 	/*
-	 * Now convert the filename parts
+	 * Store name segment into directory entry
 	 */
-	for (wcp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
-		if (--unlen < 0)
-			goto done;
-		*wcp++ = *un++;
-		*wcp++ = 0;
-	}
-	for (wcp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
-		if (--unlen < 0)
-			goto done;
-		*wcp++ = *un++;
-		*wcp++ = 0;
-	}
-	for (wcp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
-		if (--unlen < 0)
-			goto done;
-		*wcp++ = *un++;
-		*wcp++ = 0;
-	}
-	if (!unlen)
-		wep->weCnt |= WIN_LAST;
-	return unlen;
-
-done:
-	*wcp++ = 0;
-	*wcp++ = 0;
+	p = &wn[i];
+	memcpy(wep->wePart1, p, sizeof(wep->wePart1));
+	p += sizeof(wep->wePart1) / sizeof(*p);
+	memcpy(wep->wePart2, p, sizeof(wep->wePart2));
+	p += sizeof(wep->wePart2) / sizeof(*p);
+	memcpy(wep->wePart3, p, sizeof(wep->wePart3));
+	
+	if (len > i + WIN_CHARS)
+		return 1;
+
 	wep->weCnt |= WIN_LAST;
 	return 0;
 }
@@ -574,15 +1495,16 @@ done:
  * Returns the checksum or -1 if no match
  */
 int
-winChkName(const u_char *un, int unlen, struct winentry *wep, int chksum)
+winChkName(const u_char *un, int unlen, struct winentry *wep, int chksum, int utf8)
 {
-	u_int8_t *cp;
-	int i;
+	u_int16_t wn[WIN_MAXLEN], *p;
+	u_int16_t buf[WIN_CHARS];
+	int i, len;
 
 	/*
 	 * First compare checksums
 	 */
-	if (wep->weCnt&WIN_LAST)
+	if (wep->weCnt & WIN_LAST)
 		chksum = wep->weChksum;
 	else if (chksum != wep->weChksum)
 		chksum = -1;
@@ -592,56 +1514,33 @@ winChkName(const u_char *un, int unlen, 
 	/*
 	 * Offset of this entry
 	 */
-	i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
-	un += i;
-	if ((unlen -= i) < 0)
-		return -1;
+	i = ((wep->weCnt & WIN_CNT) - 1) * WIN_CHARS;
 
 	/*
-	 * Ignore redundant winentries (those with only \0\0 on start in them).
-	 * An appearance of such entry is a bug; unknown if in NetBSD msdosfs
-	 * or MS Windows.
-	 */
-	if (unlen == 0) {
-		if (wep->wePart1[0] == '\0' && wep->wePart1[1] == '\0')
-			return chksum;
-		else
-			return -1;
-	}
+	 * Translate UNIX name to ucs-2
+	 */
+	len = utf8 ? utf8ucs2str(un, unlen, wn, WIN_MAXLEN) : char8ucs2str(un, unlen, wn, WIN_MAXLEN);
+	ucs2pad(wn, len, WIN_MAXLEN);
 
-	if ((wep->weCnt&WIN_LAST) && unlen > WIN_CHARS)
+	if (i >= len + 1)
 		return -1;
 
 	/*
-	 * Compare the name parts
+	 * Fetch name segment from directory entry
 	 */
-	for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
-		if (--unlen < 0) {
-			if (!*cp++ && !*cp)
-				return chksum;
-			return -1;
-		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
-			return -1;
-	}
-	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
-		if (--unlen < 0) {
-			if (!*cp++ && !*cp)
-				return chksum;
-			return -1;
-		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
-			return -1;
-	}
-	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
-		if (--unlen < 0) {
-			if (!*cp++ && !*cp)
-				return chksum;
-			return -1;
-		}
-		if (u2l[*cp++] != u2l[*un++] || *cp++)
-			return -1;
-	}
+	p = &buf[0];
+	memcpy(p, wep->wePart1, sizeof(wep->wePart1));
+	p += sizeof(wep->wePart1) / sizeof(*p);
+	memcpy(p, wep->wePart2, sizeof(wep->wePart2));
+	p += sizeof(wep->wePart2) / sizeof(*p);
+	memcpy(p, wep->wePart3, sizeof(wep->wePart3));
+
+	/*
+	 * And compare name segment
+	 */
+	if (! (utf8 ? ucs2match(&wn[i], buf, WIN_CHARS) : char8match(&wn[i], buf, WIN_CHARS)))
+		return -1;
+
 	return chksum;
 }
 
@@ -650,110 +1549,65 @@ winChkName(const u_char *un, int unlen, 
  * Returns the checksum or -1 if impossible
  */
 int
-win2unixfn(struct winentry *wep, struct dirent *dp, int chksum)
+win2unixfn(struct winentry *wep, struct dirent *dp, int chksum, int utf8)
 {
-	u_int8_t *cp;
-	u_int8_t *np, *ep = (u_int8_t *)dp->d_name + WIN_MAXLEN;
-	int i;
+	u_int16_t wn[WIN_CHARS], *p;
+	u_int8_t buf[WIN_CHARS*3];
+	int len;
 
-	if ((wep->weCnt&WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
-	    || !(wep->weCnt&WIN_CNT))
+	if ((wep->weCnt & WIN_CNT) > howmany(WIN_MAXLEN, WIN_CHARS)
+	    || !(wep->weCnt & WIN_CNT))
 		return -1;
 
 	/*
 	 * First compare checksums
 	 */
-	if (wep->weCnt&WIN_LAST) {
+	if (wep->weCnt & WIN_LAST) {
 		chksum = wep->weChksum;
-		/*
-		 * This works even though d_namlen is one byte!
-		 */
-#ifdef __NetBSD__
-		dp->d_namlen = (wep->weCnt&WIN_CNT) * WIN_CHARS;
-#endif
+		dp->d_namlen = 0;
 	} else if (chksum != wep->weChksum)
 		chksum = -1;
 	if (chksum == -1)
 		return -1;
 
 	/*
-	 * Offset of this entry
+	 * Fetch name segment from directory entry
 	 */
-	i = ((wep->weCnt&WIN_CNT) - 1) * WIN_CHARS;
-	np = (u_int8_t *)dp->d_name + i;
+	p = &wn[0];
+	memcpy(p, wep->wePart1, sizeof(wep->wePart1));
+	p += sizeof(wep->wePart1) / sizeof(*p);
+	memcpy(p, wep->wePart2, sizeof(wep->wePart2));
+	p += sizeof(wep->wePart2) / sizeof(*p);
+	memcpy(p, wep->wePart3, sizeof(wep->wePart3));
 
 	/*
-	 * Convert the name parts
+	 * Don't allow slashes in UNIX names. Discard that entry.
 	 */
-	for (cp = wep->wePart1, i = sizeof(wep->wePart1)/2; --i >= 0;) {
-		switch (*np++ = *cp++) {
-		case 0:
-#ifdef __NetBSD__
-			dp->d_namlen -= sizeof(wep->wePart2)/2
-			    + sizeof(wep->wePart3)/2 + i + 1;
-#endif
-			return chksum;
-		case '/':
-			np[-1] = 0;
-			return -1;
-		}
-		/*
-		 * The size comparison should result in the compiler
-		 * optimizing the whole if away
-		 */
-		if (WIN_MAXLEN % WIN_CHARS < sizeof(wep->wePart1) / 2
-		    && np > ep) {
-			np[-1] = 0;
-			return -1;
-		}
-		if (*cp++)
-			return -1;
-	}
-	for (cp = wep->wePart2, i = sizeof(wep->wePart2)/2; --i >= 0;) {
-		switch (*np++ = *cp++) {
-		case 0:
-#ifdef __NetBSD__
-			dp->d_namlen -= sizeof(wep->wePart3)/2 + i + 1;
-#endif
-			return chksum;
-		case '/':
-			np[-1] = 0;
-			return -1;
-		}
-		/*
-		 * The size comparisons should be optimized away
-		 */
-		if (WIN_MAXLEN % WIN_CHARS >= sizeof(wep->wePart1) / 2
-		    && WIN_MAXLEN % WIN_CHARS < (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
-		    && np > ep) {
-			np[-1] = 0;
-			return -1;
-		}
-		if (*cp++)
-			return -1;
-	}
-	for (cp = wep->wePart3, i = sizeof(wep->wePart3)/2; --i >= 0;) {
-		switch (*np++ = *cp++) {
-		case 0:
-#ifdef __NetBSD__
-			dp->d_namlen -= i + 1;
-#endif
-			return chksum;
-		case '/':
-			np[-1] = 0;
-			return -1;
-		}
-		/*
-		 * See above
-		 */
-		if (WIN_MAXLEN % WIN_CHARS >= (sizeof(wep->wePart1) + sizeof(wep->wePart2)) / 2
-		    && np > ep) {
-			np[-1] = 0;
-			return -1;
-		}
-		if (*cp++)
-			return -1;
-	}
+	if (invalidname(wn, WIN_CHARS))
+		return -1;
+
+	/*
+	 * Translate ucs-2 to UNIX name
+	 */
+	len = utf8 ? ucs2utf8str(wn, WIN_CHARS, buf, sizeof(buf)) : ucs2char8str(wn, WIN_CHARS, buf, sizeof(buf));
+
+	/*
+	 * Prepend name segment to directory entry
+	 *
+	 * This ignores the slot number from the windows entry but
+	 * assumes that segments are read in reverse order.
+	 *
+	 * The UCS-2 name (up to 255 chars) can overflow the UNIX
+	 * directory entry (up to 511 bytes). Trailing characters
+	 * are silently discarded. This could also end in multiple
+	 * files using the same (truncated) name.
+	 */
+	dp->d_namlen += len;
+	if (dp->d_namlen > sizeof(dp->d_name)-1)
+		dp->d_namlen = sizeof(dp->d_name)-1;
+	memmove(&dp->d_name[len], &dp->d_name[0], dp->d_namlen - len);
+	memcpy(dp->d_name, buf, len);
+
 	return chksum;
 }
 
@@ -767,7 +1621,7 @@ winChksum(u_int8_t *name)
 	u_int8_t s;
 
 	for (s = 0, i = 11; --i >= 0; s += *name++)
-		s = (s << 7)|(s >> 1);
+		s = (s << 7) | (s >> 1);
 	return s;
 }
 
@@ -775,12 +1629,302 @@ winChksum(u_int8_t *name)
  * Determine the number of slots necessary for Win95 names
  */
 int
-winSlotCnt(const u_char *un, int unlen)
+winSlotCnt(const u_char *un, int unlen, int utf8)
 {
-	for (un += unlen; unlen > 0; unlen--)
-		if (*--un != ' ' && *un != '.')
+	const u_char *cp;
+	int len;
+
+	/*
+	 * Drop trailing blanks and dots
+	 */
+	for (cp = un + unlen; unlen > 0; unlen--)
+		if (*--cp != ' ' && *cp != '.')
 			break;
-	if (unlen > WIN_MAXLEN)
-		return 0;
-	return howmany(unlen, WIN_CHARS);
+
+	len = utf8 ? utf8ucs2str(un, unlen, NULL, WIN_MAXLEN) : unlen;
+
+	return howmany(len, WIN_CHARS);
+}
+
+/*
+ * Scan windows name for characters that must not
+ * appear in a UNIX filename
+ */
+static int
+invalidname(const u_int16_t *in, int n)
+{
+	while (n-- > 0) {
+		if (*in++ == '/')
+			return 1;
+	}
+
+	return 0;
+}
+
+/*
+ * Convert UCS-2 character into UTF-8
+ * return number of output bytes or 0 if output
+ * buffer is too short
+ */
+static int
+ucs2utf8(const u_int16_t *in, u_int8_t *out, int n)
+{
+	uint16_t inch = le16toh(in[0]);
+
+	if (inch <= 0x007f) {
+		if (n < 1) return 0;
+		if (out)
+			*out++ = inch;
+		return 1;
+	} else if (inch <= 0x07ff) {
+		if (n < 2) return 0;
+		if (out) {
+			*out++ = 0xc0 | (inch >> 6);
+			*out++ = 0x80 | (inch & 0x3f);
+		}
+		return 2;
+	} else {
+		if (n < 3) return 0;
+		if (out) {
+			*out++ = 0xe0 | (inch >> 12);
+			*out++ = 0x80 | ((inch >> 6) & 0x3f);
+			*out++ = 0x80 | (inch & 0x3f);
+		}
+		return 3;
+	}
 }
+
+
+/*
+ * Convert UTF-8 bytes into UCS-2 character
+ * return number of input bytes, 0 if input
+ * is too short and -1 if input is invalid
+ */
+static int
+utf8ucs2(const u_int8_t *in, int n, u_int16_t *out)
+{
+	uint16_t outch;
+
+	if (n < 1) return 0;
+
+	if (in[0] <= 0x7f) {
+		outch = in[0];
+		if (out)
+			*out = htole16(outch);
+		return 1;
+	} else if (in[0] <= 0xdf) {
+		if (n < 2) return 0;
+		outch = (in[0] & 0x1f) << 6 | (in[1] & 0x3f);
+		if (out)
+			*out = htole16(outch);
+		return 2;
+	} else if (in[0] <= 0xef) {
+		if (n < 3) return 0;
+		outch = (in[0] & 0x1f) << 12 | (in[1] & 0x3f) << 6 | (in[2] & 0x3f);
+		if (out)
+			*out = htole16(outch);
+		return 3;
+	}
+
+	return -1;
+}
+
+/*
+ * Convert UCS-2 string into UTF-8 string
+ * return total number of output bytes
+ */
+static int
+ucs2utf8str(const u_int16_t *in, int n, u_int8_t *out, int m)
+{
+	u_int8_t *p;
+	int outlen;
+
+	p = out;
+	while (n > 0 && *in != 0) {
+		outlen = ucs2utf8(in, out ? p : out, m);
+		if (outlen == 0)
+			break;
+		p += outlen;
+		m -= outlen;
+		in += 1;
+		n -= 1;
+	}
+
+	return p - out;
+}
+
+/*
+ * Convert UTF8 string into UCS-2 string
+ * return total number of output chacters
+ */
+static int
+utf8ucs2str(const u_int8_t *in, int n, u_int16_t *out, int m)
+{
+	u_int16_t *p;
+	int inlen;
+
+	p = out;
+	while (n > 0 && *in != 0) {
+		if (m < 1)
+			break;
+		inlen = utf8ucs2(in, n, out ? p : out);
+		if (inlen <= 0)
+			break;
+		in += inlen;
+		n -= inlen;
+		p += 1;
+		m -= 1;
+	}
+
+	return p - out;
+}
+
+/*
+ * Convert UCS-2 string into 8bit character string
+ * return total number of output bytes
+ */
+static int
+ucs2char8str(const u_int16_t *in, int n, u_int8_t *out, int m)
+{
+	u_int8_t *p;
+	u_int16_t inch;
+
+	p = out;
+	while (n > 0 && in[0] != 0) {
+		if (m < 1)
+			break;
+		inch = le16toh(in[0]);
+		if (inch > 255)
+			break;
+		if (p)
+			p[0] = inch;
+		p += 1;
+		m -= 1;
+		in += 1;
+		n -= 1;
+	}
+
+	return p - out;
+}
+
+/*
+ * Convert 8bit character string into UCS-2 string
+ * return total number of output chacters
+ */
+static int
+char8ucs2str(const u_int8_t *in, int n, u_int16_t *out, int m)
+{
+	u_int16_t *p;
+
+	p = out;
+	while (n > 0 && in[0] != 0) {
+		if (m < 1)
+			break;
+		if (p)
+			p[0] = htole16(in[0]);
+		p += 1;
+		m -= 1;
+		in += 1;
+		n -= 1;
+	}
+
+	return p - out;
+}
+
+static void
+ucs2pad(u_int16_t *buf, int len, int size)
+{
+
+	if (len < size-1)
+		buf[len++] = 0x0000;
+	while (len < size)
+		buf[len++] = 0xffff;
+}
+
+/*
+ * Fold UCS-2 character to uppercase
+ */
+static u_int16_t
+ucs2fold(u_int16_t w)
+{
+	int low,high,mid;
+	u_int16_t check;
+
+	w = le16toh(w);
+
+	low = 0;
+	high = __arraycount(foldmap) / 2;
+	while (low < high) {
+		mid = (low + high)/2;
+		check = foldmap[2*mid+0];
+
+		if (w == check) {
+			w = foldmap[2*mid+1];
+			break;
+		}
+
+		if (w < check)
+			high = mid;
+		else
+			low = mid+1;
+	}
+
+	w = le16toh(w);
+
+	return w;
+}
+
+/*
+ * Compare two UCS-2 strings case-insensitive
+ *
+ * uses the Unicode case folding table
+ */
+static int
+ucs2match(u_int16_t *w1, u_int16_t *w2, int n)
+{
+	u_int16_t u1, u2;
+
+	while (n > 0) {
+		if (*w1 == 0 || *w2 == 0)
+			return *w1 == *w2;
+		u1 = ucs2fold(*w1);
+		u2 = ucs2fold(*w2);
+		if (u1 != u2)
+			return 0;
+		++w1;
+		++w2;
+		--n;
+	}
+
+	return 1;
+}
+
+/*
+ * Compare two 8bit char conversions case-insensitive
+ *
+ * uses the DOS case folding table
+ */
+static int
+char8match(u_int16_t *w1, u_int16_t *w2, int n)
+{
+	u_int16_t u1, u2;
+
+	while (n > 0) {
+		u1 = le16toh(*w1);
+		u2 = le16toh(*w2);
+		if (u1 == 0 || u2 == 0)
+			return u1 == u2;
+		if (u1 > 255 || u2 > 255)
+			return 0;
+		u1 = u2l[u1 & 0xff];
+		u2 = u2l[u2 & 0xff];
+		if (u1 != u2)
+			return 0;
+		++w1;
+		++w2;
+		--n;
+	}
+
+	return 1;
+}
+

Index: src/sys/fs/msdosfs/msdosfs_lookup.c
diff -u src/sys/fs/msdosfs/msdosfs_lookup.c:1.34 src/sys/fs/msdosfs/msdosfs_lookup.c:1.35
--- src/sys/fs/msdosfs/msdosfs_lookup.c:1.34	Sat Mar 28 19:24:05 2015
+++ src/sys/fs/msdosfs/msdosfs_lookup.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_lookup.c,v 1.34 2015/03/28 19:24:05 maxv Exp $	*/
+/*	$NetBSD: msdosfs_lookup.c,v 1.35 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -52,7 +52,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_lookup.c,v 1.34 2015/03/28 19:24:05 maxv Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_lookup.c,v 1.35 2016/01/30 09:59:27 mlelstv Exp $");
 
 #include <sys/param.h>
 
@@ -187,12 +187,12 @@ msdosfs_lookup(void *v)
 		break;
 	case 2:
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
-		    cnp->cn_namelen) + 1;
+		    cnp->cn_namelen, pmp->pm_flags & MSDOSFSMNT_UTF8) + 1;
 		break;
 	case 3:
 		olddos = 0;
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
-		    cnp->cn_namelen) + 1;
+		    cnp->cn_namelen, pmp->pm_flags & MSDOSFSMNT_UTF8) + 1;
 		break;
 	}
 	if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
@@ -282,7 +282,8 @@ msdosfs_lookup(void *v)
 					chksum = winChkName((const u_char *)cnp->cn_nameptr,
 							    cnp->cn_namelen,
 							    (struct winentry *)dep,
-							    chksum);
+							    chksum,
+							    pmp->pm_flags & MSDOSFSMNT_UTF8);
 					continue;
 				}
 
@@ -652,7 +653,8 @@ createde(struct denode *dep, struct deno
 				fndoffset -= sizeof(struct direntry);
 			}
 			if (!unix2winfn(un, unlen, (struct winentry *)ndep,
-						wcnt, chksum))
+					wcnt, chksum,
+					ddep->de_pmp->pm_flags & MSDOSFSMNT_UTF8))
 				break;
 		}
 	}

Index: src/sys/fs/msdosfs/msdosfs_vnops.c
diff -u src/sys/fs/msdosfs/msdosfs_vnops.c:1.93 src/sys/fs/msdosfs/msdosfs_vnops.c:1.94
--- src/sys/fs/msdosfs/msdosfs_vnops.c:1.93	Sat Apr  4 12:34:44 2015
+++ src/sys/fs/msdosfs/msdosfs_vnops.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_vnops.c,v 1.93 2015/04/04 12:34:44 riastradh Exp $	*/
+/*	$NetBSD: msdosfs_vnops.c,v 1.94 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -48,7 +48,7 @@
  */
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.93 2015/04/04 12:34:44 riastradh Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.94 2016/01/30 09:59:27 mlelstv Exp $");
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -1541,7 +1541,7 @@ msdosfs_readdir(void *v)
 				if (pmp->pm_flags & MSDOSFSMNT_SHORTNAME)
 					continue;
 				chksum = win2unixfn((struct winentry *)dentp,
-				    dirbuf, chksum);
+				    dirbuf, chksum, pmp->pm_flags & MSDOSFSMNT_UTF8);
 				continue;
 			}
 

Index: src/sys/fs/msdosfs/msdosfsmount.h
diff -u src/sys/fs/msdosfs/msdosfsmount.h:1.20 src/sys/fs/msdosfs/msdosfsmount.h:1.21
--- src/sys/fs/msdosfs/msdosfsmount.h:1.20	Tue Jul  8 09:21:52 2014
+++ src/sys/fs/msdosfs/msdosfsmount.h	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfsmount.h,v 1.20 2014/07/08 09:21:52 hannken Exp $	*/
+/*	$NetBSD: msdosfsmount.h,v 1.21 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -78,11 +78,12 @@ struct msdosfs_args {
 #define	MSDOSFSMNT_NOWIN95	4	/* Completely ignore Win95 entries */
 #define	MSDOSFSMNT_GEMDOSFS	8	/* This is a GEMDOS-flavour */
 #define MSDOSFSMNT_VERSIONED	16	/* Struct is versioned */
+#define MSDOSFSMNT_UTF8		32	/* Use UTF8 filenames */
 
 /* All flags above: */
 #define	MSDOSFSMNT_MNTOPT \
 	(MSDOSFSMNT_SHORTNAME|MSDOSFSMNT_LONGNAME|MSDOSFSMNT_NOWIN95 \
-	 |MSDOSFSMNT_GEMDOSFS|MSDOSFSMNT_VERSIONED)
+	 |MSDOSFSMNT_GEMDOSFS|MSDOSFSMNT_VERSIONED|MSDOSFSMNT_UTF8)
 
 #define	MSDOSFSMNT_RONLY	0x80000000	/* mounted read-only	*/
 #define	MSDOSFSMNT_WAITONFAT	0x40000000	/* mounted synchronous	*/
@@ -90,7 +91,7 @@ struct msdosfs_args {
 
 #define MSDOSFSMNT_BITS "\177\20" \
     "b\00shortname\0b\01longname\0b\02nowin95\0b\03gemdosfs\0b\04mntversioned\0" \
-    "b\037ronly\0b\036waitonfat\0b\035fatmirror\0"
+    "b\05utf8\0b\037ronly\0b\036waitonfat\0b\035fatmirror\0"
 
 #ifdef _KERNEL
 #include <sys/mallocvar.h>

Index: src/usr.sbin/makefs/msdos.c
diff -u src/usr.sbin/makefs/msdos.c:1.15 src/usr.sbin/makefs/msdos.c:1.16
--- src/usr.sbin/makefs/msdos.c:1.15	Fri Oct 16 16:40:02 2015
+++ src/usr.sbin/makefs/msdos.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdos.c,v 1.15 2015/10/16 16:40:02 christos Exp $	*/
+/*	$NetBSD: msdos.c,v 1.16 2016/01/30 09:59:27 mlelstv Exp $	*/
 
 /*-
  * Copyright (c) 2013 The NetBSD Foundation, Inc.
@@ -34,7 +34,7 @@
 
 #include <sys/cdefs.h>
 #if defined(__RCSID) && !defined(__lint)
-__RCSID("$NetBSD: msdos.c,v 1.15 2015/10/16 16:40:02 christos Exp $");
+__RCSID("$NetBSD: msdos.c,v 1.16 2016/01/30 09:59:27 mlelstv Exp $");
 #endif	/* !__lint */
 
 #include <sys/param.h>
@@ -55,7 +55,9 @@ __RCSID("$NetBSD: msdos.c,v 1.15 2015/10
 #include <util.h>
 
 #include <ffs/buf.h>
+#include <fs/msdosfs/bpb.h>
 #include <fs/msdosfs/denode.h>
+#include <fs/msdosfs/msdosfsmount.h>
 #include "makefs.h"
 #include "msdos.h"
 #include "mkfs_msdos.h"
@@ -63,10 +65,15 @@ __RCSID("$NetBSD: msdos.c,v 1.15 2015/10
 static int msdos_populate_dir(const char *, struct denode *, fsnode *,
     fsnode *, fsinfo_t *);
 
+struct msdos_options_ex {
+	struct msdos_options options;
+	bool utf8;
+};
+
 void
 msdos_prep_opts(fsinfo_t *fsopts)
 {
-	struct msdos_options *msdos_opt = ecalloc(1, sizeof(*msdos_opt));
+	struct msdos_options_ex *msdos_opt = ecalloc(1, sizeof(*msdos_opt));
 	const option_t msdos_options[] = {
 #define AOPT(_opt, _type, _name, _min, _desc) { 			\
 	.letter = _opt,							\
@@ -76,7 +83,7 @@ msdos_prep_opts(fsinfo_t *fsopts)
 	    (sizeof(_type) == 1 ? OPT_INT8 :				\
 	    (sizeof(_type) == 2 ? OPT_INT16 :				\
 	    (sizeof(_type) == 4 ? OPT_INT32 : OPT_INT64)))),		\
-	.value = &msdos_opt->_name,					\
+	.value = &msdos_opt->options._name,				\
 	.minimum = _min,						\
 	.maximum = sizeof(_type) == 1 ? 0xff :				\
 	    (sizeof(_type) == 2 ? 0xffff :				\
@@ -85,6 +92,8 @@ msdos_prep_opts(fsinfo_t *fsopts)
 },
 ALLOPTS
 #undef AOPT	
+		{ 'U', "utf8", &msdos_opt->utf8, OPT_BOOL,
+		  0, 1, "Use UTF8 names" },
 		{ .name = NULL }
 	};
 
@@ -131,10 +140,11 @@ msdos_parse_opts(const char *option, fsi
 void
 msdos_makefs(const char *image, const char *dir, fsnode *root, fsinfo_t *fsopts)
 {
-	struct msdos_options *msdos_opt = fsopts->fs_specific;
+	struct msdos_options_ex *msdos_opt = fsopts->fs_specific;
 	struct vnode vp, rootvp;
 	struct timeval	start;
 	struct msdosfsmount *pmp;
+	uint32_t flags;
 
 	assert(image != NULL);
 	assert(dir != NULL);
@@ -145,31 +155,36 @@ msdos_makefs(const char *image, const ch
 	 * XXX: pick up other options from the msdos specific ones?
 	 * Is minsize right here?
 	 */
-	msdos_opt->create_size = MAX(msdos_opt->create_size, fsopts->minsize);
-	msdos_opt->offset = fsopts->offset;
-	if (msdos_opt->bytes_per_sector == 0) {
+	msdos_opt->options.create_size = MAX(msdos_opt->options.create_size,
+		fsopts->minsize);
+	msdos_opt->options.offset = fsopts->offset;
+	if (msdos_opt->options.bytes_per_sector == 0) {
 		if (fsopts->sectorsize == -1)
 			fsopts->sectorsize = 512;
-		msdos_opt->bytes_per_sector = fsopts->sectorsize;
+		msdos_opt->options.bytes_per_sector = fsopts->sectorsize;
 	} else if (fsopts->sectorsize == -1) {
-		fsopts->sectorsize = msdos_opt->bytes_per_sector;
-	} else if (fsopts->sectorsize != msdos_opt->bytes_per_sector) {
+		fsopts->sectorsize = msdos_opt->options.bytes_per_sector;
+	} else if (fsopts->sectorsize != msdos_opt->options.bytes_per_sector) {
 		err(1, "inconsistent sectorsize -S %u"
 		    "!= -o bytes_per_sector %u", 
-		    fsopts->sectorsize, msdos_opt->bytes_per_sector);
+		    fsopts->sectorsize, msdos_opt->options.bytes_per_sector);
 	}
 
 		/* create image */
 	printf("Creating `%s'\n", image);
 	TIMER_START(start);
-	if (mkfs_msdos(image, NULL, msdos_opt) == -1)
+	if (mkfs_msdos(image, NULL, &msdos_opt->options) == -1)
 		return;
 	TIMER_RESULTS(start, "mkfs_msdos");
 
 	fsopts->fd = open(image, O_RDWR);
 	vp.fs = fsopts;
 
-	if ((pmp = msdosfs_mount(&vp, 0)) == NULL)
+	flags = 0;
+	if (msdos_opt->utf8)
+		flags |= MSDOSFSMNT_UTF8;
+
+	if ((pmp = msdosfs_mount(&vp, flags)) == NULL)
 		err(1, "msdosfs_mount");
 
 	if (msdosfs_root(pmp, &rootvp) != 0)

Index: src/usr.sbin/makefs/msdos/msdosfs_vfsops.c
diff -u src/usr.sbin/makefs/msdos/msdosfs_vfsops.c:1.9 src/usr.sbin/makefs/msdos/msdosfs_vfsops.c:1.10
--- src/usr.sbin/makefs/msdos/msdosfs_vfsops.c:1.9	Sun Mar 29 05:52:59 2015
+++ src/usr.sbin/makefs/msdos/msdosfs_vfsops.c	Sat Jan 30 09:59:27 2016
@@ -50,7 +50,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.9 2015/03/29 05:52:59 agc Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_vfsops.c,v 1.10 2016/01/30 09:59:27 mlelstv Exp $");
 
 #include <sys/param.h>
 
@@ -152,6 +152,12 @@ msdosfs_mount(struct vnode *devvp, int f
 		}
 	}
 
+	pmp->pm_flags = flags & MSDOSFSMNT_MNTOPT;
+	if (pmp->pm_flags & MSDOSFSMNT_GEMDOSFS)
+		pmp->pm_flags |= MSDOSFSMNT_NOWIN95;
+	if (pmp->pm_flags & MSDOSFSMNT_NOWIN95)
+		pmp->pm_flags |= MSDOSFSMNT_SHORTNAME;
+
 	if (pmp->pm_Sectors == 0) {
 		pmp->pm_HiddenSects = getulong(b50->bpbHiddenSecs);
 		pmp->pm_HugeSectors = getulong(b50->bpbHugeSectors);

Index: src/usr.sbin/makefs/msdos/msdosfs_vnops.c
diff -u src/usr.sbin/makefs/msdos/msdosfs_vnops.c:1.16 src/usr.sbin/makefs/msdos/msdosfs_vnops.c:1.17
--- src/usr.sbin/makefs/msdos/msdosfs_vnops.c:1.16	Sun Mar 29 05:52:59 2015
+++ src/usr.sbin/makefs/msdos/msdosfs_vnops.c	Sat Jan 30 09:59:27 2016
@@ -1,4 +1,4 @@
-/*	$NetBSD: msdosfs_vnops.c,v 1.16 2015/03/29 05:52:59 agc Exp $ */
+/*	$NetBSD: msdosfs_vnops.c,v 1.17 2016/01/30 09:59:27 mlelstv Exp $ */
 
 /*-
  * Copyright (C) 1994, 1995, 1997 Wolfgang Solfrank.
@@ -51,7 +51,7 @@
 #endif
 
 #include <sys/cdefs.h>
-__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.16 2015/03/29 05:52:59 agc Exp $");
+__KERNEL_RCSID(0, "$NetBSD: msdosfs_vnops.c,v 1.17 2016/01/30 09:59:27 mlelstv Exp $");
 
 #include <sys/param.h>
 #include <sys/mman.h>
@@ -154,12 +154,12 @@ msdosfs_findslot(struct denode *dp, stru
 		break;
 	case 2:
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
-		    cnp->cn_namelen) + 1;
+		    cnp->cn_namelen, pmp->pm_flags & MSDOSFSMNT_UTF8) + 1;
 		break;
 	case 3:
 		olddos = 0;
 		wincnt = winSlotCnt((const u_char *)cnp->cn_nameptr,
-		    cnp->cn_namelen) + 1;
+		    cnp->cn_namelen, pmp->pm_flags & MSDOSFSMNT_UTF8) + 1;
 		break;
 	}
 
@@ -243,7 +243,8 @@ msdosfs_findslot(struct denode *dp, stru
 					chksum = winChkName((const u_char *)cnp->cn_nameptr,
 							    cnp->cn_namelen,
 							    (struct winentry *)dep,
-							    chksum);
+							    chksum,
+							    pmp->pm_flags & MSDOSFSMNT_UTF8);
 					continue;
 				}
 

Reply via email to