Author: Ronan Lamy <[email protected]>
Branch: 
Changeset: r86664:6b0c10d6e524
Date: 2016-08-28 22:49 +0100
http://bitbucket.org/pypy/pypy/changeset/6b0c10d6e524/

Log:    Merged rpython-resync into default

        Backport rpython changes from py3k and py3.5.

diff too long, truncating to 2000 out of 208811 lines

diff --git a/pypy/module/_sre/__init__.py b/pypy/module/_sre/__init__.py
--- a/pypy/module/_sre/__init__.py
+++ b/pypy/module/_sre/__init__.py
@@ -1,4 +1,4 @@
-from pypy.interpreter.mixedmodule import MixedModule 
+from pypy.interpreter.mixedmodule import MixedModule
 
 class Module(MixedModule):
 
@@ -7,7 +7,7 @@
 
     interpleveldefs = {
         'CODESIZE':       'space.wrap(interp_sre.CODESIZE)',
-        'MAGIC':          'space.wrap(interp_sre.MAGIC)',
+        'MAGIC':          'space.newint(20031017)',
         'MAXREPEAT':      'space.wrap(interp_sre.MAXREPEAT)',
         'compile':        'interp_sre.W_SRE_Pattern',
         'getlower':       'interp_sre.w_getlower',
diff --git a/pypy/module/_sre/interp_sre.py b/pypy/module/_sre/interp_sre.py
--- a/pypy/module/_sre/interp_sre.py
+++ b/pypy/module/_sre/interp_sre.py
@@ -14,7 +14,7 @@
 # Constants and exposed functions
 
 from rpython.rlib.rsre import rsre_core
-from rpython.rlib.rsre.rsre_char import MAGIC, CODESIZE, MAXREPEAT, getlower, 
set_unicode_db
+from rpython.rlib.rsre.rsre_char import CODESIZE, MAXREPEAT, getlower, 
set_unicode_db
 
 
 @unwrap_spec(char_ord=int, flags=int)
diff --git a/pypy/module/signal/interp_signal.py 
b/pypy/module/signal/interp_signal.py
--- a/pypy/module/signal/interp_signal.py
+++ b/pypy/module/signal/interp_signal.py
@@ -64,7 +64,7 @@
         AsyncAction.__init__(self, space)
         self.pending_signal = -1
         self.fire_in_another_thread = False
-        #
+
         @rgc.no_collect
         def _after_thread_switch():
             if self.fire_in_another_thread:
@@ -251,7 +251,7 @@
         except OSError as e:
             if e.errno == errno.EBADF:
                 raise oefmt(space.w_ValueError, "invalid fd")
-    old_fd = pypysig_set_wakeup_fd(fd)
+    old_fd = pypysig_set_wakeup_fd(fd, True)
     return space.wrap(intmask(old_fd))
 
 
diff --git a/rpython/rlib/_rsocket_rffi.py b/rpython/rlib/_rsocket_rffi.py
--- a/rpython/rlib/_rsocket_rffi.py
+++ b/rpython/rlib/_rsocket_rffi.py
@@ -176,6 +176,7 @@
 
 
 SOCK_DGRAM SOCK_RAW SOCK_RDM SOCK_SEQPACKET SOCK_STREAM
+SOCK_CLOEXEC
 
 SOL_SOCKET SOL_IPX SOL_AX25 SOL_ATALK SOL_NETROM SOL_ROSE
 
@@ -319,6 +320,8 @@
                                           [('p_proto', rffi.INT),
                                            ])
 
+CConfig.HAVE_ACCEPT4 = platform.Has('accept4')
+
 if _POSIX:
     CConfig.nfds_t = platform.SimpleType('nfds_t')
     CConfig.pollfd = platform.Struct('struct pollfd',
@@ -541,6 +544,12 @@
 socketaccept = external('accept', [socketfd_type, sockaddr_ptr,
                                    socklen_t_ptr], socketfd_type,
                         save_err=SAVE_ERR)
+HAVE_ACCEPT4 = cConfig.HAVE_ACCEPT4
+if HAVE_ACCEPT4:
+    socketaccept4 = external('accept4', [socketfd_type, sockaddr_ptr,
+                                         socklen_t_ptr, rffi.INT],
+                                        socketfd_type,
+                             save_err=SAVE_ERR)
 socketbind = external('bind', [socketfd_type, sockaddr_ptr, socklen_t],
                               rffi.INT, save_err=SAVE_ERR)
 socketlisten = external('listen', [socketfd_type, rffi.INT], rffi.INT,
diff --git a/rpython/rlib/buffer.py b/rpython/rlib/buffer.py
--- a/rpython/rlib/buffer.py
+++ b/rpython/rlib/buffer.py
@@ -10,6 +10,7 @@
     _immutable_ = True
 
     def getlength(self):
+        """Returns the size in bytes (even if getitemsize() > 1)."""
         raise NotImplementedError
 
     def __len__(self):
diff --git a/rpython/rlib/rbigint.py b/rpython/rlib/rbigint.py
--- a/rpython/rlib/rbigint.py
+++ b/rpython/rlib/rbigint.py
@@ -1222,6 +1222,9 @@
         # base is supposed to be positive or 0.0, which means we use e
         if base == 10.0:
             return _loghelper(math.log10, self)
+        if base == 2.0:
+            from rpython.rlib import rfloat
+            return _loghelper(rfloat.log2, self)
         ret = _loghelper(math.log, self)
         if base != 0.0:
             ret /= math.log(base)
diff --git a/rpython/rlib/ropenssl.py b/rpython/rlib/ropenssl.py
--- a/rpython/rlib/ropenssl.py
+++ b/rpython/rlib/ropenssl.py
@@ -126,6 +126,10 @@
         "SSL_OP_NO_COMPRESSION")
     SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS = rffi_platform.ConstantInteger(
         "SSL_OP_DONT_INSERT_EMPTY_FRAGMENTS")
+    SSL_OP_CIPHER_SERVER_PREFERENCE = rffi_platform.ConstantInteger(
+        "SSL_OP_CIPHER_SERVER_PREFERENCE")
+    SSL_OP_SINGLE_DH_USE = rffi_platform.ConstantInteger(
+        "SSL_OP_SINGLE_DH_USE")
     HAS_SNI = rffi_platform.Defined("SSL_CTRL_SET_TLSEXT_HOSTNAME")
     HAS_NPN = rffi_platform.Defined("OPENSSL_NPN_NEGOTIATED")
     SSL_VERIFY_NONE = rffi_platform.ConstantInteger("SSL_VERIFY_NONE")
@@ -307,6 +311,8 @@
 
 if HAVE_OPENSSL_RAND:
     ssl_external('RAND_add', [rffi.CCHARP, rffi.INT, rffi.DOUBLE], lltype.Void)
+    ssl_external('RAND_bytes', [rffi.UCHARP, rffi.INT], rffi.INT)
+    ssl_external('RAND_pseudo_bytes', [rffi.UCHARP, rffi.INT], rffi.INT)
     ssl_external('RAND_status', [], rffi.INT)
     if HAVE_OPENSSL_RAND_EGD:
         ssl_external('RAND_egd', [rffi.CCHARP], rffi.INT)
@@ -465,6 +471,7 @@
 ssl_external('GENERAL_NAME_print', [BIO, GENERAL_NAME], rffi.INT)
 ssl_external('pypy_GENERAL_NAME_dirn', [GENERAL_NAME], X509_NAME,
              macro=True)
+
 ssl_external('pypy_GENERAL_NAME_uri', [GENERAL_NAME], ASN1_IA5STRING,
              macro=True)
 
diff --git a/rpython/rlib/rposix.py b/rpython/rlib/rposix.py
--- a/rpython/rlib/rposix.py
+++ b/rpython/rlib/rposix.py
@@ -223,7 +223,7 @@
             pass
 
 if _WIN32:
-    includes = ['io.h', 'sys/utime.h', 'sys/types.h', 'process.h']
+    includes = ['io.h', 'sys/utime.h', 'sys/types.h', 'process.h', 'time.h']
     libraries = []
 else:
     if sys.platform.startswith(('darwin', 'netbsd', 'openbsd')):
@@ -254,10 +254,11 @@
     UTIMBUF = rffi_platform.Struct('struct %sutimbuf' % UNDERSCORE_ON_WIN32,
                                    [('actime', rffi.INT),
                                     ('modtime', rffi.INT)])
+    CLOCK_T = rffi_platform.SimpleType('clock_t', rffi.INT)
     if not _WIN32:
         UID_T = rffi_platform.SimpleType('uid_t', rffi.UINT)
         GID_T = rffi_platform.SimpleType('gid_t', rffi.UINT)
-        CLOCK_T = rffi_platform.SimpleType('clock_t', rffi.INT)
+        TIOCGWINSZ = rffi_platform.DefinedConstantInteger('TIOCGWINSZ')
 
         TMS = rffi_platform.Struct(
             'struct tms', [('tms_utime', rffi.INT),
@@ -265,6 +266,12 @@
                            ('tms_cutime', rffi.INT),
                            ('tms_cstime', rffi.INT)])
 
+        WINSIZE = rffi_platform.Struct(
+            'struct winsize', [('ws_row', rffi.USHORT),
+                           ('ws_col', rffi.USHORT),
+                           ('ws_xpixel', rffi.USHORT),
+                           ('ws_ypixel', rffi.USHORT)])
+
     GETPGRP_HAVE_ARG = rffi_platform.Has("getpgrp(0)")
     SETPGRP_HAVE_ARG = rffi_platform.Has("setpgrp(0, 0)")
 
@@ -365,15 +372,27 @@
         raise OSError(get_saved_errno(), '%s failed' % name)
     return result
 
+def _dup(fd, inheritable=True):
+    validate_fd(fd)
+    if inheritable:
+        res = c_dup(fd)
+    else:
+        res = c_dup_noninheritable(fd)
+    return res
+
 @replace_os_function('dup')
-def dup(fd):
-    validate_fd(fd)
-    return handle_posix_error('dup', c_dup(fd))
+def dup(fd, inheritable=True):
+    res = _dup(fd, inheritable)
+    return handle_posix_error('dup', res)
 
 @replace_os_function('dup2')
-def dup2(fd, newfd):
+def dup2(fd, newfd, inheritable=True):
     validate_fd(fd)
-    handle_posix_error('dup2', c_dup2(fd, newfd))
+    if inheritable:
+        res = c_dup2(fd, newfd)
+    else:
+        res = c_dup2_noninheritable(fd, newfd)
+    handle_posix_error('dup2', res)
 
 #___________________________________________________________________
 
@@ -628,6 +647,8 @@
                          macro=True, save_err=rffi.RFFI_FULL_ERRNO_ZERO)
     c_closedir = external('closedir', [DIRP], rffi.INT, releasegil=False)
     c_dirfd = external('dirfd', [DIRP], rffi.INT, releasegil=False)
+    c_ioctl_voidp = external('ioctl', [rffi.INT, rffi.UINT, rffi.VOIDP], 
rffi.INT,
+                         save_err=rffi.RFFI_SAVE_ERRNO)
 else:
     dirent_config = {}
 
@@ -1113,37 +1134,77 @@
     c_open_osfhandle = external('_open_osfhandle', [rffi.INTPTR_T,
                                                     rffi.INT],
                                 rffi.INT)
+    HAVE_PIPE2 = False
+    HAVE_DUP3 = False
+    O_CLOEXEC = None
 else:
     INT_ARRAY_P = rffi.CArrayPtr(rffi.INT)
     c_pipe = external('pipe', [INT_ARRAY_P], rffi.INT,
                       save_err=rffi.RFFI_SAVE_ERRNO)
+    class CConfig:
+        _compilation_info_ = eci
+        HAVE_PIPE2 = rffi_platform.Has('pipe2')
+        HAVE_DUP3 = rffi_platform.Has('dup3')
+        O_CLOEXEC = rffi_platform.DefinedConstantInteger('O_CLOEXEC')
+    config = rffi_platform.configure(CConfig)
+    HAVE_PIPE2 = config['HAVE_PIPE2']
+    HAVE_DUP3 = config['HAVE_DUP3']
+    O_CLOEXEC = config['O_CLOEXEC']
+    if HAVE_PIPE2:
+        c_pipe2 = external('pipe2', [INT_ARRAY_P, rffi.INT], rffi.INT,
+                          save_err=rffi.RFFI_SAVE_ERRNO)
 
 @replace_os_function('pipe')
-def pipe():
+def pipe(flags=0):
+    # 'flags' might be ignored.  Check the result.
     if _WIN32:
+        # 'flags' ignored
         pread  = lltype.malloc(rwin32.LPHANDLE.TO, 1, flavor='raw')
         pwrite = lltype.malloc(rwin32.LPHANDLE.TO, 1, flavor='raw')
         try:
-            if not CreatePipe(
-                    pread, pwrite, lltype.nullptr(rffi.VOIDP.TO), 0):
-                raise WindowsError(rwin32.GetLastError_saved(),
-                                   "CreatePipe failed")
+            ok = CreatePipe(
+                    pread, pwrite, lltype.nullptr(rffi.VOIDP.TO), 0)
             hread = rffi.cast(rffi.INTPTR_T, pread[0])
             hwrite = rffi.cast(rffi.INTPTR_T, pwrite[0])
         finally:
             lltype.free(pwrite, flavor='raw')
             lltype.free(pread, flavor='raw')
-        fdread = c_open_osfhandle(hread, 0)
-        fdwrite = c_open_osfhandle(hwrite, 1)
+        if ok:
+            fdread = c_open_osfhandle(hread, 0)
+            fdwrite = c_open_osfhandle(hwrite, 1)
+            if fdread == -1 or fdwrite == -1:
+                rwin32.CloseHandle(hread)
+                rwin32.CloseHandle(hwrite)
+                ok = 0
+        if not ok:
+            raise WindowsError(rwin32.GetLastError_saved(),
+                               "CreatePipe failed")
         return (fdread, fdwrite)
     else:
         filedes = lltype.malloc(INT_ARRAY_P.TO, 2, flavor='raw')
         try:
-            handle_posix_error('pipe', c_pipe(filedes))
+            if HAVE_PIPE2 and _pipe2_syscall.attempt_syscall():
+                res = c_pipe2(filedes, flags)
+                if _pipe2_syscall.fallback(res):
+                    res = c_pipe(filedes)
+            else:
+                res = c_pipe(filedes)      # 'flags' ignored
+            handle_posix_error('pipe', res)
             return (widen(filedes[0]), widen(filedes[1]))
         finally:
             lltype.free(filedes, flavor='raw')
 
+def pipe2(flags):
+    # Only available if there is really a c_pipe2 function.
+    # No fallback to pipe() if we get ENOSYS.
+    filedes = lltype.malloc(INT_ARRAY_P.TO, 2, flavor='raw')
+    try:
+        res = c_pipe2(filedes, flags)
+        handle_posix_error('pipe2', res)
+        return (widen(filedes[0]), widen(filedes[1]))
+    finally:
+        lltype.free(filedes, flavor='raw')
+
 c_link = external('link', [rffi.CCHARP, rffi.CCHARP], rffi.INT,
                   save_err=rffi.RFFI_SAVE_ERRNO,)
 c_symlink = external('symlink', [rffi.CCHARP, rffi.CCHARP], rffi.INT,
@@ -2079,14 +2140,46 @@
 
 
 eci_inheritable = eci.merge(ExternalCompilationInfo(
-    separate_module_sources=["""
+    separate_module_sources=[r"""
+#include <errno.h>
+
 RPY_EXTERN
 int rpy_set_inheritable(int fd, int inheritable)
 {
-    /* XXX minimal impl. XXX */
-    int request = inheritable ? FIONCLEX : FIOCLEX;
-    return ioctl(fd, request, NULL);
+    static int ioctl_works = -1;
+    int flags;
+
+    if (ioctl_works != 0) {
+        int request = inheritable ? FIONCLEX : FIOCLEX;
+        int err = ioctl(fd, request, NULL);
+        if (!err) {
+            ioctl_works = 1;
+            return 0;
+        }
+
+        if (errno != ENOTTY && errno != EACCES) {
+            return -1;
+        }
+        else {
+            /* ENOTTY: The ioctl is declared but not supported by the
+               kernel.  EACCES: SELinux policy, this can be the case on
+               Android. */
+            ioctl_works = 0;
+        }
+        /* fallback to fcntl() if ioctl() does not work */
+    }
+
+    flags = fcntl(fd, F_GETFD);
+    if (flags < 0)
+        return -1;
+
+    if (inheritable)
+        flags &= ~FD_CLOEXEC;
+    else
+        flags |= FD_CLOEXEC;
+    return fcntl(fd, F_SETFD, flags);
 }
+
 RPY_EXTERN
 int rpy_get_inheritable(int fd)
 {
@@ -2095,8 +2188,64 @@
         return -1;
     return !(flags & FD_CLOEXEC);
 }
-    """],
-    post_include_bits=['RPY_EXTERN int rpy_set_inheritable(int, int);']))
+
+RPY_EXTERN
+int rpy_dup_noninheritable(int fd)
+{
+#ifdef _WIN32
+#error NotImplementedError
+#endif
+
+#ifdef F_DUPFD_CLOEXEC
+    return fcntl(fd, F_DUPFD_CLOEXEC, 0);
+#else
+    fd = dup(fd);
+    if (fd >= 0) {
+        if (rpy_set_inheritable(fd, 0) != 0) {
+            close(fd);
+            return -1;
+        }
+    }
+    return fd;
+#endif
+}
+
+RPY_EXTERN
+int rpy_dup2_noninheritable(int fd, int fd2)
+{
+#ifdef _WIN32
+#error NotImplementedError
+#endif
+
+#ifdef F_DUP2FD_CLOEXEC
+    return fcntl(fd, F_DUP2FD_CLOEXEC, fd2);
+
+#else
+# if %(HAVE_DUP3)d   /* HAVE_DUP3 */
+    static int dup3_works = -1;
+    if (dup3_works != 0) {
+        if (dup3(fd, fd2, O_CLOEXEC) >= 0)
+            return 0;
+        if (dup3_works == -1)
+            dup3_works = (errno != ENOSYS);
+        if (dup3_works)
+            return -1;
+    }
+# endif
+    if (dup2(fd, fd2) < 0)
+        return -1;
+    if (rpy_set_inheritable(fd2, 0) != 0) {
+        close(fd2);
+        return -1;
+    }
+    return 0;
+#endif
+}
+    """ % {'HAVE_DUP3': HAVE_DUP3}],
+    post_include_bits=['RPY_EXTERN int rpy_set_inheritable(int, int);\n'
+                       'RPY_EXTERN int rpy_get_inheritable(int);\n'
+                       'RPY_EXTERN int rpy_dup_noninheritable(int);\n'
+                       'RPY_EXTERN int rpy_dup2_noninheritable(int, int);\n']))
 
 c_set_inheritable = external('rpy_set_inheritable', [rffi.INT, rffi.INT],
                              rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO,
@@ -2104,12 +2253,56 @@
 c_get_inheritable = external('rpy_get_inheritable', [rffi.INT],
                              rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO,
                              compilation_info=eci_inheritable)
+c_dup_noninheritable = external('rpy_dup_noninheritable', [rffi.INT],
+                                rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO,
+                                compilation_info=eci_inheritable)
+c_dup2_noninheritable = external('rpy_dup2_noninheritable', 
[rffi.INT,rffi.INT],
+                                 rffi.INT, save_err=rffi.RFFI_SAVE_ERRNO,
+                                 compilation_info=eci_inheritable)
 
 def set_inheritable(fd, inheritable):
-    error = c_set_inheritable(fd, inheritable)
-    handle_posix_error('set_inheritable', error)
+    result = c_set_inheritable(fd, inheritable)
+    handle_posix_error('set_inheritable', result)
 
 def get_inheritable(fd):
     res = c_get_inheritable(fd)
     res = handle_posix_error('get_inheritable', res)
     return res != 0
+
+class SetNonInheritableCache(object):
+    """Make one prebuilt instance of this for each path that creates
+    file descriptors, where you don't necessarily know if that function
+    returns inheritable or non-inheritable file descriptors.
+    """
+    _immutable_fields_ = ['cached_inheritable?']
+    cached_inheritable = -1    # -1 = don't know yet; 0 = off; 1 = on
+
+    def set_non_inheritable(self, fd):
+        if self.cached_inheritable == -1:
+            self.cached_inheritable = get_inheritable(fd)
+        if self.cached_inheritable == 1:
+            # 'fd' is inheritable; we must manually turn it off
+            set_inheritable(fd, False)
+
+    def _cleanup_(self):
+        self.cached_inheritable = -1
+
+class ENoSysCache(object):
+    """Cache whether a system call returns ENOSYS or not."""
+    _immutable_fields_ = ['cached_nosys?']
+    cached_nosys = -1      # -1 = don't know; 0 = no; 1 = yes, getting ENOSYS
+
+    def attempt_syscall(self):
+        return self.cached_nosys != 1
+
+    def fallback(self, res):
+        nosys = self.cached_nosys
+        if nosys == -1:
+            nosys = (res < 0 and get_saved_errno() == errno.ENOSYS)
+            self.cached_nosys = nosys
+        return nosys
+
+    def _cleanup_(self):
+        self.cached_nosys = -1
+
+_pipe2_syscall = ENoSysCache()
diff --git a/rpython/rlib/rsignal.py b/rpython/rlib/rsignal.py
--- a/rpython/rlib/rsignal.py
+++ b/rpython/rlib/rsignal.py
@@ -31,7 +31,7 @@
     signal_names.append('CTRL_BREAK_EVENT')
     CTRL_C_EVENT = 0
     CTRL_BREAK_EVENT = 1
-includes = ['stdlib.h', 'src/signals.h']
+includes = ['stdlib.h', 'src/signals.h', 'signal.h']
 if sys.platform != 'win32':
     includes.append('sys/time.h')
 
@@ -47,7 +47,9 @@
     _compilation_info_ = eci
 
 if sys.platform != 'win32':
-    for name in """ITIMER_REAL ITIMER_VIRTUAL ITIMER_PROF""".split():
+    for name in """
+        ITIMER_REAL ITIMER_VIRTUAL ITIMER_PROF
+        SIG_BLOCK SIG_UNBLOCK SIG_SETMASK""".split():
         setattr(CConfig, name, rffi_platform.DefinedConstantInteger(name))
 
     CConfig.timeval = rffi_platform.Struct(
@@ -71,7 +73,8 @@
 pypysig_default = external('pypysig_default', [rffi.INT], lltype.Void)
 pypysig_setflag = external('pypysig_setflag', [rffi.INT], lltype.Void)
 pypysig_reinstall = external('pypysig_reinstall', [rffi.INT], lltype.Void)
-pypysig_set_wakeup_fd = external('pypysig_set_wakeup_fd', [rffi.INT], rffi.INT)
+pypysig_set_wakeup_fd = external('pypysig_set_wakeup_fd',
+                                 [rffi.INT, rffi.INT], rffi.INT)
 pypysig_poll = external('pypysig_poll', [], rffi.INT, releasegil=False)
 # don't bother releasing the GIL around a call to pypysig_poll: it's
 # pointless and a performance issue
@@ -98,3 +101,20 @@
                            [rffi.INT, itimervalP, itimervalP], rffi.INT,
                            save_err=rffi.RFFI_SAVE_ERRNO)
     c_getitimer = external('getitimer', [rffi.INT, itimervalP], rffi.INT)
+
+c_pthread_kill = external('pthread_kill', [lltype.Signed, rffi.INT], rffi.INT,
+                          save_err=rffi.RFFI_SAVE_ERRNO)
+
+if sys.platform != 'win32':
+    c_sigset_t = rffi.COpaquePtr('sigset_t', compilation_info=eci)
+    c_sigemptyset = external('sigemptyset', [c_sigset_t], rffi.INT)
+    c_sigaddset = external('sigaddset', [c_sigset_t, rffi.INT], rffi.INT)
+    c_sigismember = external('sigismember', [c_sigset_t, rffi.INT], rffi.INT)
+    c_sigwait = external('sigwait', [c_sigset_t, rffi.INTP], rffi.INT,
+                         releasegil=True,
+                         save_err=rffi.RFFI_SAVE_ERRNO)
+    c_sigpending = external('sigpending', [c_sigset_t], rffi.INT,
+                            save_err=rffi.RFFI_SAVE_ERRNO)
+    c_pthread_sigmask = external('pthread_sigmask',
+                                 [rffi.INT, c_sigset_t, c_sigset_t], rffi.INT,
+                                 save_err=rffi.RFFI_SAVE_ERRNO)
diff --git a/rpython/rlib/rsocket.py b/rpython/rlib/rsocket.py
--- a/rpython/rlib/rsocket.py
+++ b/rpython/rlib/rsocket.py
@@ -8,10 +8,11 @@
 # XXX this does not support yet the least common AF_xxx address families
 # supported by CPython.  See http://bugs.pypy.org/issue1942
 
+from errno import EINVAL
 from rpython.rlib import _rsocket_rffi as _c, jit, rgc
 from rpython.rlib.objectmodel import instantiate, keepalive_until_here
 from rpython.rlib.rarithmetic import intmask, r_uint
-from rpython.rlib import rthread
+from rpython.rlib import rthread, rposix
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rtyper.lltypesystem.rffi import sizeof, offsetof
 from rpython.rtyper.extregistry import ExtRegistryEntry
@@ -522,12 +523,28 @@
     timeout = -1.0
 
     def __init__(self, family=AF_INET, type=SOCK_STREAM, proto=0,
-                 fd=_c.INVALID_SOCKET):
+                 fd=_c.INVALID_SOCKET, inheritable=True):
         """Create a new socket."""
         if _c.invalid_socket(fd):
-            fd = _c.socket(family, type, proto)
-        if _c.invalid_socket(fd):
-            raise self.error_handler()
+            if not inheritable and SOCK_CLOEXEC is not None:
+                # Non-inheritable: we try to call socket() with
+                # SOCK_CLOEXEC, which may fail.  If we get EINVAL,
+                # then we fall back to the SOCK_CLOEXEC-less case.
+                fd = _c.socket(family, type | SOCK_CLOEXEC, proto)
+                if fd < 0:
+                    if _c.geterrno() == EINVAL:
+                        # Linux older than 2.6.27 does not support
+                        # SOCK_CLOEXEC.  An EINVAL might be caused by
+                        # random other things, though.  Don't cache.
+                        pass
+                    else:
+                        raise self.error_handler()
+            if _c.invalid_socket(fd):
+                fd = _c.socket(family, type, proto)
+                if _c.invalid_socket(fd):
+                    raise self.error_handler()
+                if not inheritable:
+                    sock_set_inheritable(fd, False)
         # PLAT RISCOS
         self.fd = fd
         self.family = family
@@ -630,20 +647,33 @@
         return addr, addr.addr_p, addrlen_p
 
     @jit.dont_look_inside
-    def accept(self):
+    def accept(self, inheritable=True):
         """Wait for an incoming connection.
         Return (new socket fd, client address)."""
         if self._select(False) == 1:
             raise SocketTimeout
         address, addr_p, addrlen_p = self._addrbuf()
         try:
-            newfd = _c.socketaccept(self.fd, addr_p, addrlen_p)
+            remove_inheritable = not inheritable
+            if (not inheritable and SOCK_CLOEXEC is not None
+                    and _c.HAVE_ACCEPT4
+                    and _accept4_syscall.attempt_syscall()):
+                newfd = _c.socketaccept4(self.fd, addr_p, addrlen_p,
+                                         SOCK_CLOEXEC)
+                if _accept4_syscall.fallback(newfd):
+                    newfd = _c.socketaccept(self.fd, addr_p, addrlen_p)
+                else:
+                    remove_inheritable = False
+            else:
+                newfd = _c.socketaccept(self.fd, addr_p, addrlen_p)
             addrlen = addrlen_p[0]
         finally:
             lltype.free(addrlen_p, flavor='raw')
             address.unlock()
         if _c.invalid_socket(newfd):
             raise self.error_handler()
+        if remove_inheritable:
+            sock_set_inheritable(newfd, False)
         address.addrlen = rffi.cast(lltype.Signed, addrlen)
         return (newfd, address)
 
@@ -1032,6 +1062,12 @@
     return result
 make_socket._annspecialcase_ = 'specialize:arg(4)'
 
+def sock_set_inheritable(fd, inheritable):
+    try:
+        rposix.set_inheritable(fd, inheritable)
+    except OSError as e:
+        raise CSocketError(e.errno)
+
 class SocketError(Exception):
     applevelerrcls = 'error'
     def __init__(self):
@@ -1090,7 +1126,7 @@
 
 if hasattr(_c, 'socketpair'):
     def socketpair(family=socketpair_default_family, type=SOCK_STREAM, proto=0,
-                   SocketClass=RSocket):
+                   SocketClass=RSocket, inheritable=True):
         """socketpair([family[, type[, proto]]]) -> (socket object, socket 
object)
 
         Create a pair of socket objects from the sockets returned by the 
platform
@@ -1100,18 +1136,41 @@
         """
         result = lltype.malloc(_c.socketpair_t, 2, flavor='raw')
         try:
-            res = _c.socketpair(family, type, proto, result)
+            res = -1
+            remove_inheritable = not inheritable
+            if not inheritable and SOCK_CLOEXEC is not None:
+                # Non-inheritable: we try to call socketpair() with
+                # SOCK_CLOEXEC, which may fail.  If we get EINVAL,
+                # then we fall back to the SOCK_CLOEXEC-less case.
+                res = _c.socketpair(family, type | SOCK_CLOEXEC,
+                                    proto, result)
+                if res < 0:
+                    if _c.geterrno() == EINVAL:
+                        # Linux older than 2.6.27 does not support
+                        # SOCK_CLOEXEC.  An EINVAL might be caused by
+                        # random other things, though.  Don't cache.
+                        pass
+                    else:
+                        raise last_error()
+                else:
+                    remove_inheritable = False
+            #
             if res < 0:
-                raise last_error()
+                res = _c.socketpair(family, type, proto, result)
+                if res < 0:
+                    raise last_error()
             fd0 = rffi.cast(lltype.Signed, result[0])
             fd1 = rffi.cast(lltype.Signed, result[1])
         finally:
             lltype.free(result, flavor='raw')
+        if remove_inheritable:
+            sock_set_inheritable(fd0, False)
+            sock_set_inheritable(fd1, False)
         return (make_socket(fd0, family, type, proto, SocketClass),
                 make_socket(fd1, family, type, proto, SocketClass))
 
 if _c.WIN32:
-    def dup(fd):
+    def dup(fd, inheritable=True):
         with lltype.scoped_alloc(_c.WSAPROTOCOL_INFO, zero=True) as info:
             if _c.WSADuplicateSocket(fd, rwin32.GetCurrentProcessId(), info):
                 raise last_error()
@@ -1122,15 +1181,16 @@
                 raise last_error()
             return result
 else:
-    def dup(fd):
-        return _c.dup(fd)
-
-    def fromfd(fd, family, type, proto=0, SocketClass=RSocket):
-        # Dup the fd so it and the socket can be closed independently
-        fd = _c.dup(fd)
+    def dup(fd, inheritable=True):
+        fd = rposix._dup(fd, inheritable)
         if fd < 0:
             raise last_error()
-        return make_socket(fd, family, type, proto, SocketClass)
+        return fd
+
+def fromfd(fd, family, type, proto=0, SocketClass=RSocket, inheritable=True):
+    # Dup the fd so it and the socket can be closed independently
+    fd = dup(fd, inheritable=inheritable)
+    return make_socket(fd, family, type, proto, SocketClass)
 
 def getdefaulttimeout():
     return defaults.timeout
@@ -1407,3 +1467,5 @@
     if timeout < 0.0:
         timeout = -1.0
     defaults.timeout = timeout
+
+_accept4_syscall = rposix.ENoSysCache()
diff --git a/rpython/rlib/rsre/rsre_char.py b/rpython/rlib/rsre/rsre_char.py
--- a/rpython/rlib/rsre/rsre_char.py
+++ b/rpython/rlib/rsre/rsre_char.py
@@ -23,16 +23,15 @@
 
 #### Constants
 
-# Identifying as _sre from Python 2.3 and onwards (at least up to 2.7)
-MAGIC = 20031017
-
 if sys.maxint > 2**32:
     MAXREPEAT = int(2**32 - 1)
+    MAXGROUPS = int(2**31 - 1)
 else:
     MAXREPEAT = int(2**31 - 1)
+    MAXGROUPS = int((2**31 / sys.maxint / 2) - 1)
 
 # In _sre.c this is bytesize of the code word type of the C implementation.
-# There it's 2 for normal Python builds and more for wide unicode builds 
(large 
+# There it's 2 for normal Python builds and more for wide unicode builds (large
 # enough to hold a 32-bit UCS-4 encoded character). Since here in pure Python
 # we only see re bytecodes as Python longs, we shouldn't have to care about the
 # codesize. But sre_compile will compile some stuff differently depending on 
the
diff --git a/rpython/rlib/rstruct/nativefmttable.py 
b/rpython/rlib/rstruct/nativefmttable.py
--- a/rpython/rlib/rstruct/nativefmttable.py
+++ b/rpython/rlib/rstruct/nativefmttable.py
@@ -66,11 +66,13 @@
                'i': 'signed int',
                'l': 'signed long',
                'q': 'signed long long',
+               'n': 'ssize_t',
                'B': 'unsigned char',
                'H': 'unsigned short',
                'I': 'unsigned int',
                'L': 'unsigned long',
                'Q': 'unsigned long long',
+               'N': 'size_t',
                'P': 'char *',
                'f': 'float',
                'd': 'double',
@@ -78,8 +80,11 @@
                }
 
     pre_include_bits = ["""
+        #include <sys/types.h>
         #ifdef _MSC_VER
         #define _Bool char
+        typedef int ssize_t; /* XXX fixme for 64 bit*/
+        typedef unsigned int size_t; /* XXX fixme for 64 bit*/
         #endif"""]
     field_names = dict.fromkeys(INSPECT)
     for fmtchar, ctype in INSPECT.iteritems():
diff --git a/rpython/rlib/rthread.py b/rpython/rlib/rthread.py
--- a/rpython/rlib/rthread.py
+++ b/rpython/rlib/rthread.py
@@ -22,6 +22,16 @@
     include_dirs = [translator_c_dir],
 )
 
+class CConfig:
+    _compilation_info_ = eci
+    RPYTHREAD_NAME = rffi_platform.DefinedConstantString('RPYTHREAD_NAME')
+    USE_SEMAPHORES = rffi_platform.Defined('USE_SEMAPHORES')
+    CS_GNU_LIBPTHREAD_VERSION = rffi_platform.DefinedConstantInteger(
+        '_CS_GNU_LIBPTHREAD_VERSION')
+cconfig = rffi_platform.configure(CConfig)
+globals().update(cconfig)
+
+
 def llexternal(name, args, result, **kwds):
     kwds.setdefault('sandboxsafe', True)
     return rffi.llexternal(name, args, result, compilation_info=eci,
diff --git a/rpython/rlib/runicode.py b/rpython/rlib/runicode.py
--- a/rpython/rlib/runicode.py
+++ b/rpython/rlib/runicode.py
@@ -1,7 +1,7 @@
 import sys
 from rpython.rlib.objectmodel import specialize, we_are_translated
 from rpython.rlib.rstring import StringBuilder, UnicodeBuilder
-from rpython.rlib.rarithmetic import r_uint, intmask
+from rpython.rlib.rarithmetic import r_uint, intmask, widen
 from rpython.rlib.unicodedata import unicodedb
 from rpython.rtyper.lltypesystem import lltype, rffi
 from rpython.rlib import jit
@@ -74,7 +74,7 @@
 else:
     def code_to_unichr(code):
         # generate surrogates for large codes
-        return unichr_returns_surrogate(code)
+        return unichr_returns_surrogate(widen(code))
 
 def _STORECHAR(result, CH, byteorder):
     hi = chr(((CH) >> 8) & 0xff)
@@ -1381,7 +1381,7 @@
                 result.append(STR('\\\\'))
 
             # Map non-printable or non-ascii to '\xhh' or '\uhhhh'
-            elif pass_printable and not unicodedb.isprintable(oc):
+            elif pass_printable and not (oc <= 0x10ffff and 
unicodedb.isprintable(oc)):
                 char_escape_helper(result, oc)
             elif not pass_printable and (oc < 32 or oc >= 0x7F):
                 char_escape_helper(result, oc)
diff --git a/rpython/rlib/rwin32.py b/rpython/rlib/rwin32.py
--- a/rpython/rlib/rwin32.py
+++ b/rpython/rlib/rwin32.py
@@ -46,6 +46,7 @@
         LPWSTR = rffi_platform.SimpleType("LPWSTR", rffi.CWCHARP)
         LPCWSTR = rffi_platform.SimpleType("LPCWSTR", rffi.CWCHARP)
         LPDWORD = rffi_platform.SimpleType("LPDWORD", rffi.UINTP)
+        LPBOOL = rffi_platform.SimpleType("LPBOOL", rffi.LONGP)
         SIZE_T = rffi_platform.SimpleType("SIZE_T", rffi.SIZE_T)
         ULONG_PTR = rffi_platform.SimpleType("ULONG_PTR", rffi.ULONG)
 
@@ -58,6 +59,24 @@
         SYSTEMTIME = rffi_platform.Struct('SYSTEMTIME',
                                           [])
 
+        Struct = rffi_platform.Struct
+        COORD = Struct("COORD",
+                       [("X", rffi.SHORT),
+                        ("Y", rffi.SHORT)])
+
+        SMALL_RECT = Struct("SMALL_RECT",
+                            [("Left", rffi.SHORT),
+                             ("Top", rffi.SHORT),
+                             ("Right", rffi.SHORT),
+                             ("Bottom", rffi.SHORT)])
+
+        CONSOLE_SCREEN_BUFFER_INFO = Struct("CONSOLE_SCREEN_BUFFER_INFO",
+                                            [("dwSize", COORD),
+                                             ("dwCursorPosition", COORD),
+                                             ("wAttributes", WORD.ctype_hint),
+                                             ("srWindow", SMALL_RECT),
+                                             ("dwMaximumWindowSize", COORD)])
+
         OSVERSIONINFOEX = rffi_platform.Struct(
             'OSVERSIONINFOEX',
             [('dwOSVersionInfoSize', rffi.UINT),
@@ -92,7 +111,8 @@
                        PROCESS_VM_WRITE
                        CTRL_C_EVENT CTRL_BREAK_EVENT
                        MB_ERR_INVALID_CHARS ERROR_NO_UNICODE_TRANSLATION
-                       WC_NO_BEST_FIT_CHARS
+                       WC_NO_BEST_FIT_CHARS STD_INPUT_HANDLE STD_OUTPUT_HANDLE
+                       STD_ERROR_HANDLE
                     """
         from rpython.translator.platform import host_factory
         static_platform = host_factory()
@@ -443,3 +463,13 @@
         return rffi.cast(lltype.Signed, _GetConsoleOutputCP())
 
     _wenviron_items, _wgetenv, _wputenv = make_env_impls(win32=True)
+
+
+    _GetStdHandle = winexternal(
+        'GetStdHandle', [DWORD], HANDLE)
+
+    def GetStdHandle(handle_id):
+        return _GetStdHandle(handle_id)
+    CONSOLE_SCREEN_BUFFER_INFO_P = lltype.Ptr(CONSOLE_SCREEN_BUFFER_INFO)
+    GetConsoleScreenBufferInfo = winexternal(
+        "GetConsoleScreenBufferInfo", [HANDLE, CONSOLE_SCREEN_BUFFER_INFO_P], 
BOOL)
diff --git a/rpython/rlib/rzlib.py b/rpython/rlib/rzlib.py
--- a/rpython/rlib/rzlib.py
+++ b/rpython/rlib/rzlib.py
@@ -324,7 +324,8 @@
     return data
 
 
-def decompress(stream, data, flush=Z_SYNC_FLUSH, max_length=sys.maxint):
+def decompress(stream, data, flush=Z_SYNC_FLUSH, max_length=sys.maxint,
+               zdict=None):
     """
     Feed more data into an inflate stream.  Returns a tuple (string,
     finished, unused_data_length).  The string contains (a part of) the
@@ -350,7 +351,7 @@
         should_finish = False
     while_doing = "while decompressing data"
     data, err, avail_in = _operate(stream, data, flush, max_length, _inflate,
-                                   while_doing)
+                                   while_doing, zdict=zdict)
     if should_finish:
         # detect incomplete input
         rffi.setintfield(stream, 'c_avail_in', 0)
@@ -361,7 +362,7 @@
     return data, finished, avail_in
 
 
-def _operate(stream, data, flush, max_length, cfunc, while_doing):
+def _operate(stream, data, flush, max_length, cfunc, while_doing, zdict=None):
     """Common code for compress() and decompress().
     """
     # Prepare the input buffer for the stream
@@ -388,6 +389,10 @@
                 max_length -= bufsize
                 rffi.setintfield(stream, 'c_avail_out', bufsize)
                 err = cfunc(stream, flush)
+                if err == Z_NEED_DICT and zdict is not None:
+                    inflateSetDictionary(stream, zdict)
+                    # repeat the call to inflate
+                    err = cfunc(stream, flush)
                 if err == Z_OK or err == Z_STREAM_END:
                     # accumulate data into 'result'
                     avail_out = rffi.cast(lltype.Signed, stream.c_avail_out)
diff --git a/rpython/rlib/test/test_rbigint.py 
b/rpython/rlib/test/test_rbigint.py
--- a/rpython/rlib/test/test_rbigint.py
+++ b/rpython/rlib/test/test_rbigint.py
@@ -674,6 +674,11 @@
                 else:
                     assert ulps_check(l, math.log(op)) is None
 
+    def test_log2(self):
+        assert rbigint.fromlong(1).log(2.0) == 0.0
+        assert rbigint.fromlong(2).log(2.0) == 1.0
+        assert rbigint.fromlong(2**1023).log(2.0) == 1023.0
+
 class TestInternalFunctions(object):
     def test__inplace_divrem1(self):
         # signs are not handled in the helpers!
diff --git a/rpython/rlib/test/test_rposix.py b/rpython/rlib/test/test_rposix.py
--- a/rpython/rlib/test/test_rposix.py
+++ b/rpython/rlib/test/test_rposix.py
@@ -589,3 +589,18 @@
     assert rposix.get_inheritable(fd1) == False
     os.close(fd1)
     os.close(fd2)
+
+def test_SetNonInheritableCache():
+    cache = rposix.SetNonInheritableCache()
+    fd1, fd2 = os.pipe()
+    assert rposix.get_inheritable(fd1) == True
+    assert rposix.get_inheritable(fd1) == True
+    assert cache.cached_inheritable == -1
+    cache.set_non_inheritable(fd1)
+    assert cache.cached_inheritable == 1
+    cache.set_non_inheritable(fd2)
+    assert cache.cached_inheritable == 1
+    assert rposix.get_inheritable(fd1) == False
+    assert rposix.get_inheritable(fd1) == False
+    os.close(fd1)
+    os.close(fd2)
diff --git a/rpython/rlib/test/test_rsocket.py 
b/rpython/rlib/test/test_rsocket.py
--- a/rpython/rlib/test/test_rsocket.py
+++ b/rpython/rlib/test/test_rsocket.py
@@ -119,6 +119,16 @@
     s1.close()
     s2.close()
 
+def test_socketpair_inheritable():
+    if sys.platform == "win32":
+        py.test.skip('No socketpair on Windows')
+    for inh in [False, True]:
+        s1, s2 = socketpair(inheritable=inh)
+        assert rposix.get_inheritable(s1.fd) == inh
+        assert rposix.get_inheritable(s2.fd) == inh
+        s1.close()
+        s2.close()
+
 def test_socketpair_recvinto_1():
     class Buffer:
         def setslice(self, start, string):
@@ -378,6 +388,12 @@
     s1.close()
     s2.close()
 
+def test_inheritable():
+    for inh in [False, True]:
+        s1 = RSocket(inheritable=inh)
+        assert rposix.get_inheritable(s1.fd) == inh
+        s1.close()
+
 def test_getaddrinfo_http():
     lst = getaddrinfo('localhost', 'http')
     assert isinstance(lst, list)
diff --git a/rpython/rlib/unicodedata/CaseFolding-3.2.0.txt 
b/rpython/rlib/unicodedata/CaseFolding-3.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/CaseFolding-3.2.0.txt
@@ -0,0 +1,912 @@
+# CaseFolding-3.2.0.txt
+# Date: 2002-03-22,20:54:33 GMT [MD]
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character 
Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full 
case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and 
"Ma&#65533;e" to match.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, see
+# UTR #21 Case Mappings, at http://www.unicode.org/unicode/reports/tr21/
+#
+# 
================================================================================
+# Format
+# 
================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full 
mappings.
+# F: full case folding, mappings that cause strings to grow in length. 
Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the 
normal mapping for these characters.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the 
desired case-folding
+#    behavior. (The default option is to exclude them.)
+#
+# =================================================================
+
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
+00C3; C; 00E3; # LATIN CAPITAL LETTER A WITH TILDE
+00C4; C; 00E4; # LATIN CAPITAL LETTER A WITH DIAERESIS
+00C5; C; 00E5; # LATIN CAPITAL LETTER A WITH RING ABOVE
+00C6; C; 00E6; # LATIN CAPITAL LETTER AE
+00C7; C; 00E7; # LATIN CAPITAL LETTER C WITH CEDILLA
+00C8; C; 00E8; # LATIN CAPITAL LETTER E WITH GRAVE
+00C9; C; 00E9; # LATIN CAPITAL LETTER E WITH ACUTE
+00CA; C; 00EA; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX
+00CB; C; 00EB; # LATIN CAPITAL LETTER E WITH DIAERESIS
+00CC; C; 00EC; # LATIN CAPITAL LETTER I WITH GRAVE
+00CD; C; 00ED; # LATIN CAPITAL LETTER I WITH ACUTE
+00CE; C; 00EE; # LATIN CAPITAL LETTER I WITH CIRCUMFLEX
+00CF; C; 00EF; # LATIN CAPITAL LETTER I WITH DIAERESIS
+00D0; C; 00F0; # LATIN CAPITAL LETTER ETH
+00D1; C; 00F1; # LATIN CAPITAL LETTER N WITH TILDE
+00D2; C; 00F2; # LATIN CAPITAL LETTER O WITH GRAVE
+00D3; C; 00F3; # LATIN CAPITAL LETTER O WITH ACUTE
+00D4; C; 00F4; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX
+00D5; C; 00F5; # LATIN CAPITAL LETTER O WITH TILDE
+00D6; C; 00F6; # LATIN CAPITAL LETTER O WITH DIAERESIS
+00D8; C; 00F8; # LATIN CAPITAL LETTER O WITH STROKE
+00D9; C; 00F9; # LATIN CAPITAL LETTER U WITH GRAVE
+00DA; C; 00FA; # LATIN CAPITAL LETTER U WITH ACUTE
+00DB; C; 00FB; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX
+00DC; C; 00FC; # LATIN CAPITAL LETTER U WITH DIAERESIS
+00DD; C; 00FD; # LATIN CAPITAL LETTER Y WITH ACUTE
+00DE; C; 00FE; # LATIN CAPITAL LETTER THORN
+00DF; F; 0073 0073; # LATIN SMALL LETTER SHARP S
+0100; C; 0101; # LATIN CAPITAL LETTER A WITH MACRON
+0102; C; 0103; # LATIN CAPITAL LETTER A WITH BREVE
+0104; C; 0105; # LATIN CAPITAL LETTER A WITH OGONEK
+0106; C; 0107; # LATIN CAPITAL LETTER C WITH ACUTE
+0108; C; 0109; # LATIN CAPITAL LETTER C WITH CIRCUMFLEX
+010A; C; 010B; # LATIN CAPITAL LETTER C WITH DOT ABOVE
+010C; C; 010D; # LATIN CAPITAL LETTER C WITH CARON
+010E; C; 010F; # LATIN CAPITAL LETTER D WITH CARON
+0110; C; 0111; # LATIN CAPITAL LETTER D WITH STROKE
+0112; C; 0113; # LATIN CAPITAL LETTER E WITH MACRON
+0114; C; 0115; # LATIN CAPITAL LETTER E WITH BREVE
+0116; C; 0117; # LATIN CAPITAL LETTER E WITH DOT ABOVE
+0118; C; 0119; # LATIN CAPITAL LETTER E WITH OGONEK
+011A; C; 011B; # LATIN CAPITAL LETTER E WITH CARON
+011C; C; 011D; # LATIN CAPITAL LETTER G WITH CIRCUMFLEX
+011E; C; 011F; # LATIN CAPITAL LETTER G WITH BREVE
+0120; C; 0121; # LATIN CAPITAL LETTER G WITH DOT ABOVE
+0122; C; 0123; # LATIN CAPITAL LETTER G WITH CEDILLA
+0124; C; 0125; # LATIN CAPITAL LETTER H WITH CIRCUMFLEX
+0126; C; 0127; # LATIN CAPITAL LETTER H WITH STROKE
+0128; C; 0129; # LATIN CAPITAL LETTER I WITH TILDE
+012A; C; 012B; # LATIN CAPITAL LETTER I WITH MACRON
+012C; C; 012D; # LATIN CAPITAL LETTER I WITH BREVE
+012E; C; 012F; # LATIN CAPITAL LETTER I WITH OGONEK
+0130; F; 0069 0307; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0130; T; 0069; # LATIN CAPITAL LETTER I WITH DOT ABOVE
+0132; C; 0133; # LATIN CAPITAL LIGATURE IJ
+0134; C; 0135; # LATIN CAPITAL LETTER J WITH CIRCUMFLEX
+0136; C; 0137; # LATIN CAPITAL LETTER K WITH CEDILLA
+0139; C; 013A; # LATIN CAPITAL LETTER L WITH ACUTE
+013B; C; 013C; # LATIN CAPITAL LETTER L WITH CEDILLA
+013D; C; 013E; # LATIN CAPITAL LETTER L WITH CARON
+013F; C; 0140; # LATIN CAPITAL LETTER L WITH MIDDLE DOT
+0141; C; 0142; # LATIN CAPITAL LETTER L WITH STROKE
+0143; C; 0144; # LATIN CAPITAL LETTER N WITH ACUTE
+0145; C; 0146; # LATIN CAPITAL LETTER N WITH CEDILLA
+0147; C; 0148; # LATIN CAPITAL LETTER N WITH CARON
+0149; F; 02BC 006E; # LATIN SMALL LETTER N PRECEDED BY APOSTROPHE
+014A; C; 014B; # LATIN CAPITAL LETTER ENG
+014C; C; 014D; # LATIN CAPITAL LETTER O WITH MACRON
+014E; C; 014F; # LATIN CAPITAL LETTER O WITH BREVE
+0150; C; 0151; # LATIN CAPITAL LETTER O WITH DOUBLE ACUTE
+0152; C; 0153; # LATIN CAPITAL LIGATURE OE
+0154; C; 0155; # LATIN CAPITAL LETTER R WITH ACUTE
+0156; C; 0157; # LATIN CAPITAL LETTER R WITH CEDILLA
+0158; C; 0159; # LATIN CAPITAL LETTER R WITH CARON
+015A; C; 015B; # LATIN CAPITAL LETTER S WITH ACUTE
+015C; C; 015D; # LATIN CAPITAL LETTER S WITH CIRCUMFLEX
+015E; C; 015F; # LATIN CAPITAL LETTER S WITH CEDILLA
+0160; C; 0161; # LATIN CAPITAL LETTER S WITH CARON
+0162; C; 0163; # LATIN CAPITAL LETTER T WITH CEDILLA
+0164; C; 0165; # LATIN CAPITAL LETTER T WITH CARON
+0166; C; 0167; # LATIN CAPITAL LETTER T WITH STROKE
+0168; C; 0169; # LATIN CAPITAL LETTER U WITH TILDE
+016A; C; 016B; # LATIN CAPITAL LETTER U WITH MACRON
+016C; C; 016D; # LATIN CAPITAL LETTER U WITH BREVE
+016E; C; 016F; # LATIN CAPITAL LETTER U WITH RING ABOVE
+0170; C; 0171; # LATIN CAPITAL LETTER U WITH DOUBLE ACUTE
+0172; C; 0173; # LATIN CAPITAL LETTER U WITH OGONEK
+0174; C; 0175; # LATIN CAPITAL LETTER W WITH CIRCUMFLEX
+0176; C; 0177; # LATIN CAPITAL LETTER Y WITH CIRCUMFLEX
+0178; C; 00FF; # LATIN CAPITAL LETTER Y WITH DIAERESIS
+0179; C; 017A; # LATIN CAPITAL LETTER Z WITH ACUTE
+017B; C; 017C; # LATIN CAPITAL LETTER Z WITH DOT ABOVE
+017D; C; 017E; # LATIN CAPITAL LETTER Z WITH CARON
+017F; C; 0073; # LATIN SMALL LETTER LONG S
+0181; C; 0253; # LATIN CAPITAL LETTER B WITH HOOK
+0182; C; 0183; # LATIN CAPITAL LETTER B WITH TOPBAR
+0184; C; 0185; # LATIN CAPITAL LETTER TONE SIX
+0186; C; 0254; # LATIN CAPITAL LETTER OPEN O
+0187; C; 0188; # LATIN CAPITAL LETTER C WITH HOOK
+0189; C; 0256; # LATIN CAPITAL LETTER AFRICAN D
+018A; C; 0257; # LATIN CAPITAL LETTER D WITH HOOK
+018B; C; 018C; # LATIN CAPITAL LETTER D WITH TOPBAR
+018E; C; 01DD; # LATIN CAPITAL LETTER REVERSED E
+018F; C; 0259; # LATIN CAPITAL LETTER SCHWA
+0190; C; 025B; # LATIN CAPITAL LETTER OPEN E
+0191; C; 0192; # LATIN CAPITAL LETTER F WITH HOOK
+0193; C; 0260; # LATIN CAPITAL LETTER G WITH HOOK
+0194; C; 0263; # LATIN CAPITAL LETTER GAMMA
+0196; C; 0269; # LATIN CAPITAL LETTER IOTA
+0197; C; 0268; # LATIN CAPITAL LETTER I WITH STROKE
+0198; C; 0199; # LATIN CAPITAL LETTER K WITH HOOK
+019C; C; 026F; # LATIN CAPITAL LETTER TURNED M
+019D; C; 0272; # LATIN CAPITAL LETTER N WITH LEFT HOOK
+019F; C; 0275; # LATIN CAPITAL LETTER O WITH MIDDLE TILDE
+01A0; C; 01A1; # LATIN CAPITAL LETTER O WITH HORN
+01A2; C; 01A3; # LATIN CAPITAL LETTER OI
+01A4; C; 01A5; # LATIN CAPITAL LETTER P WITH HOOK
+01A6; C; 0280; # LATIN LETTER YR
+01A7; C; 01A8; # LATIN CAPITAL LETTER TONE TWO
+01A9; C; 0283; # LATIN CAPITAL LETTER ESH
+01AC; C; 01AD; # LATIN CAPITAL LETTER T WITH HOOK
+01AE; C; 0288; # LATIN CAPITAL LETTER T WITH RETROFLEX HOOK
+01AF; C; 01B0; # LATIN CAPITAL LETTER U WITH HORN
+01B1; C; 028A; # LATIN CAPITAL LETTER UPSILON
+01B2; C; 028B; # LATIN CAPITAL LETTER V WITH HOOK
+01B3; C; 01B4; # LATIN CAPITAL LETTER Y WITH HOOK
+01B5; C; 01B6; # LATIN CAPITAL LETTER Z WITH STROKE
+01B7; C; 0292; # LATIN CAPITAL LETTER EZH
+01B8; C; 01B9; # LATIN CAPITAL LETTER EZH REVERSED
+01BC; C; 01BD; # LATIN CAPITAL LETTER TONE FIVE
+01C4; C; 01C6; # LATIN CAPITAL LETTER DZ WITH CARON
+01C5; C; 01C6; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON
+01C7; C; 01C9; # LATIN CAPITAL LETTER LJ
+01C8; C; 01C9; # LATIN CAPITAL LETTER L WITH SMALL LETTER J
+01CA; C; 01CC; # LATIN CAPITAL LETTER NJ
+01CB; C; 01CC; # LATIN CAPITAL LETTER N WITH SMALL LETTER J
+01CD; C; 01CE; # LATIN CAPITAL LETTER A WITH CARON
+01CF; C; 01D0; # LATIN CAPITAL LETTER I WITH CARON
+01D1; C; 01D2; # LATIN CAPITAL LETTER O WITH CARON
+01D3; C; 01D4; # LATIN CAPITAL LETTER U WITH CARON
+01D5; C; 01D6; # LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON
+01D7; C; 01D8; # LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE
+01D9; C; 01DA; # LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON
+01DB; C; 01DC; # LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE
+01DE; C; 01DF; # LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON
+01E0; C; 01E1; # LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON
+01E2; C; 01E3; # LATIN CAPITAL LETTER AE WITH MACRON
+01E4; C; 01E5; # LATIN CAPITAL LETTER G WITH STROKE
+01E6; C; 01E7; # LATIN CAPITAL LETTER G WITH CARON
+01E8; C; 01E9; # LATIN CAPITAL LETTER K WITH CARON
+01EA; C; 01EB; # LATIN CAPITAL LETTER O WITH OGONEK
+01EC; C; 01ED; # LATIN CAPITAL LETTER O WITH OGONEK AND MACRON
+01EE; C; 01EF; # LATIN CAPITAL LETTER EZH WITH CARON
+01F0; F; 006A 030C; # LATIN SMALL LETTER J WITH CARON
+01F1; C; 01F3; # LATIN CAPITAL LETTER DZ
+01F2; C; 01F3; # LATIN CAPITAL LETTER D WITH SMALL LETTER Z
+01F4; C; 01F5; # LATIN CAPITAL LETTER G WITH ACUTE
+01F6; C; 0195; # LATIN CAPITAL LETTER HWAIR
+01F7; C; 01BF; # LATIN CAPITAL LETTER WYNN
+01F8; C; 01F9; # LATIN CAPITAL LETTER N WITH GRAVE
+01FA; C; 01FB; # LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE
+01FC; C; 01FD; # LATIN CAPITAL LETTER AE WITH ACUTE
+01FE; C; 01FF; # LATIN CAPITAL LETTER O WITH STROKE AND ACUTE
+0200; C; 0201; # LATIN CAPITAL LETTER A WITH DOUBLE GRAVE
+0202; C; 0203; # LATIN CAPITAL LETTER A WITH INVERTED BREVE
+0204; C; 0205; # LATIN CAPITAL LETTER E WITH DOUBLE GRAVE
+0206; C; 0207; # LATIN CAPITAL LETTER E WITH INVERTED BREVE
+0208; C; 0209; # LATIN CAPITAL LETTER I WITH DOUBLE GRAVE
+020A; C; 020B; # LATIN CAPITAL LETTER I WITH INVERTED BREVE
+020C; C; 020D; # LATIN CAPITAL LETTER O WITH DOUBLE GRAVE
+020E; C; 020F; # LATIN CAPITAL LETTER O WITH INVERTED BREVE
+0210; C; 0211; # LATIN CAPITAL LETTER R WITH DOUBLE GRAVE
+0212; C; 0213; # LATIN CAPITAL LETTER R WITH INVERTED BREVE
+0214; C; 0215; # LATIN CAPITAL LETTER U WITH DOUBLE GRAVE
+0216; C; 0217; # LATIN CAPITAL LETTER U WITH INVERTED BREVE
+0218; C; 0219; # LATIN CAPITAL LETTER S WITH COMMA BELOW
+021A; C; 021B; # LATIN CAPITAL LETTER T WITH COMMA BELOW
+021C; C; 021D; # LATIN CAPITAL LETTER YOGH
+021E; C; 021F; # LATIN CAPITAL LETTER H WITH CARON
+0220; C; 019E; # LATIN CAPITAL LETTER N WITH LONG RIGHT LEG
+0222; C; 0223; # LATIN CAPITAL LETTER OU
+0224; C; 0225; # LATIN CAPITAL LETTER Z WITH HOOK
+0226; C; 0227; # LATIN CAPITAL LETTER A WITH DOT ABOVE
+0228; C; 0229; # LATIN CAPITAL LETTER E WITH CEDILLA
+022A; C; 022B; # LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON
+022C; C; 022D; # LATIN CAPITAL LETTER O WITH TILDE AND MACRON
+022E; C; 022F; # LATIN CAPITAL LETTER O WITH DOT ABOVE
+0230; C; 0231; # LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON
+0232; C; 0233; # LATIN CAPITAL LETTER Y WITH MACRON
+0345; C; 03B9; # COMBINING GREEK YPOGEGRAMMENI
+0386; C; 03AC; # GREEK CAPITAL LETTER ALPHA WITH TONOS
+0388; C; 03AD; # GREEK CAPITAL LETTER EPSILON WITH TONOS
+0389; C; 03AE; # GREEK CAPITAL LETTER ETA WITH TONOS
+038A; C; 03AF; # GREEK CAPITAL LETTER IOTA WITH TONOS
+038C; C; 03CC; # GREEK CAPITAL LETTER OMICRON WITH TONOS
+038E; C; 03CD; # GREEK CAPITAL LETTER UPSILON WITH TONOS
+038F; C; 03CE; # GREEK CAPITAL LETTER OMEGA WITH TONOS
+0390; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND TONOS
+0391; C; 03B1; # GREEK CAPITAL LETTER ALPHA
+0392; C; 03B2; # GREEK CAPITAL LETTER BETA
+0393; C; 03B3; # GREEK CAPITAL LETTER GAMMA
+0394; C; 03B4; # GREEK CAPITAL LETTER DELTA
+0395; C; 03B5; # GREEK CAPITAL LETTER EPSILON
+0396; C; 03B6; # GREEK CAPITAL LETTER ZETA
+0397; C; 03B7; # GREEK CAPITAL LETTER ETA
+0398; C; 03B8; # GREEK CAPITAL LETTER THETA
+0399; C; 03B9; # GREEK CAPITAL LETTER IOTA
+039A; C; 03BA; # GREEK CAPITAL LETTER KAPPA
+039B; C; 03BB; # GREEK CAPITAL LETTER LAMDA
+039C; C; 03BC; # GREEK CAPITAL LETTER MU
+039D; C; 03BD; # GREEK CAPITAL LETTER NU
+039E; C; 03BE; # GREEK CAPITAL LETTER XI
+039F; C; 03BF; # GREEK CAPITAL LETTER OMICRON
+03A0; C; 03C0; # GREEK CAPITAL LETTER PI
+03A1; C; 03C1; # GREEK CAPITAL LETTER RHO
+03A3; C; 03C3; # GREEK CAPITAL LETTER SIGMA
+03A4; C; 03C4; # GREEK CAPITAL LETTER TAU
+03A5; C; 03C5; # GREEK CAPITAL LETTER UPSILON
+03A6; C; 03C6; # GREEK CAPITAL LETTER PHI
+03A7; C; 03C7; # GREEK CAPITAL LETTER CHI
+03A8; C; 03C8; # GREEK CAPITAL LETTER PSI
+03A9; C; 03C9; # GREEK CAPITAL LETTER OMEGA
+03AA; C; 03CA; # GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
+03AB; C; 03CB; # GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
+03B0; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND TONOS
+03C2; C; 03C3; # GREEK SMALL LETTER FINAL SIGMA
+03D0; C; 03B2; # GREEK BETA SYMBOL
+03D1; C; 03B8; # GREEK THETA SYMBOL
+03D5; C; 03C6; # GREEK PHI SYMBOL
+03D6; C; 03C0; # GREEK PI SYMBOL
+03D8; C; 03D9; # GREEK LETTER ARCHAIC KOPPA
+03DA; C; 03DB; # GREEK LETTER STIGMA
+03DC; C; 03DD; # GREEK LETTER DIGAMMA
+03DE; C; 03DF; # GREEK LETTER KOPPA
+03E0; C; 03E1; # GREEK LETTER SAMPI
+03E2; C; 03E3; # COPTIC CAPITAL LETTER SHEI
+03E4; C; 03E5; # COPTIC CAPITAL LETTER FEI
+03E6; C; 03E7; # COPTIC CAPITAL LETTER KHEI
+03E8; C; 03E9; # COPTIC CAPITAL LETTER HORI
+03EA; C; 03EB; # COPTIC CAPITAL LETTER GANGIA
+03EC; C; 03ED; # COPTIC CAPITAL LETTER SHIMA
+03EE; C; 03EF; # COPTIC CAPITAL LETTER DEI
+03F0; C; 03BA; # GREEK KAPPA SYMBOL
+03F1; C; 03C1; # GREEK RHO SYMBOL
+03F2; C; 03C3; # GREEK LUNATE SIGMA SYMBOL
+03F4; C; 03B8; # GREEK CAPITAL THETA SYMBOL
+03F5; C; 03B5; # GREEK LUNATE EPSILON SYMBOL
+0400; C; 0450; # CYRILLIC CAPITAL LETTER IE WITH GRAVE
+0401; C; 0451; # CYRILLIC CAPITAL LETTER IO
+0402; C; 0452; # CYRILLIC CAPITAL LETTER DJE
+0403; C; 0453; # CYRILLIC CAPITAL LETTER GJE
+0404; C; 0454; # CYRILLIC CAPITAL LETTER UKRAINIAN IE
+0405; C; 0455; # CYRILLIC CAPITAL LETTER DZE
+0406; C; 0456; # CYRILLIC CAPITAL LETTER BYELORUSSIAN-UKRAINIAN I
+0407; C; 0457; # CYRILLIC CAPITAL LETTER YI
+0408; C; 0458; # CYRILLIC CAPITAL LETTER JE
+0409; C; 0459; # CYRILLIC CAPITAL LETTER LJE
+040A; C; 045A; # CYRILLIC CAPITAL LETTER NJE
+040B; C; 045B; # CYRILLIC CAPITAL LETTER TSHE
+040C; C; 045C; # CYRILLIC CAPITAL LETTER KJE
+040D; C; 045D; # CYRILLIC CAPITAL LETTER I WITH GRAVE
+040E; C; 045E; # CYRILLIC CAPITAL LETTER SHORT U
+040F; C; 045F; # CYRILLIC CAPITAL LETTER DZHE
+0410; C; 0430; # CYRILLIC CAPITAL LETTER A
+0411; C; 0431; # CYRILLIC CAPITAL LETTER BE
+0412; C; 0432; # CYRILLIC CAPITAL LETTER VE
+0413; C; 0433; # CYRILLIC CAPITAL LETTER GHE
+0414; C; 0434; # CYRILLIC CAPITAL LETTER DE
+0415; C; 0435; # CYRILLIC CAPITAL LETTER IE
+0416; C; 0436; # CYRILLIC CAPITAL LETTER ZHE
+0417; C; 0437; # CYRILLIC CAPITAL LETTER ZE
+0418; C; 0438; # CYRILLIC CAPITAL LETTER I
+0419; C; 0439; # CYRILLIC CAPITAL LETTER SHORT I
+041A; C; 043A; # CYRILLIC CAPITAL LETTER KA
+041B; C; 043B; # CYRILLIC CAPITAL LETTER EL
+041C; C; 043C; # CYRILLIC CAPITAL LETTER EM
+041D; C; 043D; # CYRILLIC CAPITAL LETTER EN
+041E; C; 043E; # CYRILLIC CAPITAL LETTER O
+041F; C; 043F; # CYRILLIC CAPITAL LETTER PE
+0420; C; 0440; # CYRILLIC CAPITAL LETTER ER
+0421; C; 0441; # CYRILLIC CAPITAL LETTER ES
+0422; C; 0442; # CYRILLIC CAPITAL LETTER TE
+0423; C; 0443; # CYRILLIC CAPITAL LETTER U
+0424; C; 0444; # CYRILLIC CAPITAL LETTER EF
+0425; C; 0445; # CYRILLIC CAPITAL LETTER HA
+0426; C; 0446; # CYRILLIC CAPITAL LETTER TSE
+0427; C; 0447; # CYRILLIC CAPITAL LETTER CHE
+0428; C; 0448; # CYRILLIC CAPITAL LETTER SHA
+0429; C; 0449; # CYRILLIC CAPITAL LETTER SHCHA
+042A; C; 044A; # CYRILLIC CAPITAL LETTER HARD SIGN
+042B; C; 044B; # CYRILLIC CAPITAL LETTER YERU
+042C; C; 044C; # CYRILLIC CAPITAL LETTER SOFT SIGN
+042D; C; 044D; # CYRILLIC CAPITAL LETTER E
+042E; C; 044E; # CYRILLIC CAPITAL LETTER YU
+042F; C; 044F; # CYRILLIC CAPITAL LETTER YA
+0460; C; 0461; # CYRILLIC CAPITAL LETTER OMEGA
+0462; C; 0463; # CYRILLIC CAPITAL LETTER YAT
+0464; C; 0465; # CYRILLIC CAPITAL LETTER IOTIFIED E
+0466; C; 0467; # CYRILLIC CAPITAL LETTER LITTLE YUS
+0468; C; 0469; # CYRILLIC CAPITAL LETTER IOTIFIED LITTLE YUS
+046A; C; 046B; # CYRILLIC CAPITAL LETTER BIG YUS
+046C; C; 046D; # CYRILLIC CAPITAL LETTER IOTIFIED BIG YUS
+046E; C; 046F; # CYRILLIC CAPITAL LETTER KSI
+0470; C; 0471; # CYRILLIC CAPITAL LETTER PSI
+0472; C; 0473; # CYRILLIC CAPITAL LETTER FITA
+0474; C; 0475; # CYRILLIC CAPITAL LETTER IZHITSA
+0476; C; 0477; # CYRILLIC CAPITAL LETTER IZHITSA WITH DOUBLE GRAVE ACCENT
+0478; C; 0479; # CYRILLIC CAPITAL LETTER UK
+047A; C; 047B; # CYRILLIC CAPITAL LETTER ROUND OMEGA
+047C; C; 047D; # CYRILLIC CAPITAL LETTER OMEGA WITH TITLO
+047E; C; 047F; # CYRILLIC CAPITAL LETTER OT
+0480; C; 0481; # CYRILLIC CAPITAL LETTER KOPPA
+048A; C; 048B; # CYRILLIC CAPITAL LETTER SHORT I WITH TAIL
+048C; C; 048D; # CYRILLIC CAPITAL LETTER SEMISOFT SIGN
+048E; C; 048F; # CYRILLIC CAPITAL LETTER ER WITH TICK
+0490; C; 0491; # CYRILLIC CAPITAL LETTER GHE WITH UPTURN
+0492; C; 0493; # CYRILLIC CAPITAL LETTER GHE WITH STROKE
+0494; C; 0495; # CYRILLIC CAPITAL LETTER GHE WITH MIDDLE HOOK
+0496; C; 0497; # CYRILLIC CAPITAL LETTER ZHE WITH DESCENDER
+0498; C; 0499; # CYRILLIC CAPITAL LETTER ZE WITH DESCENDER
+049A; C; 049B; # CYRILLIC CAPITAL LETTER KA WITH DESCENDER
+049C; C; 049D; # CYRILLIC CAPITAL LETTER KA WITH VERTICAL STROKE
+049E; C; 049F; # CYRILLIC CAPITAL LETTER KA WITH STROKE
+04A0; C; 04A1; # CYRILLIC CAPITAL LETTER BASHKIR KA
+04A2; C; 04A3; # CYRILLIC CAPITAL LETTER EN WITH DESCENDER
+04A4; C; 04A5; # CYRILLIC CAPITAL LIGATURE EN GHE
+04A6; C; 04A7; # CYRILLIC CAPITAL LETTER PE WITH MIDDLE HOOK
+04A8; C; 04A9; # CYRILLIC CAPITAL LETTER ABKHASIAN HA
+04AA; C; 04AB; # CYRILLIC CAPITAL LETTER ES WITH DESCENDER
+04AC; C; 04AD; # CYRILLIC CAPITAL LETTER TE WITH DESCENDER
+04AE; C; 04AF; # CYRILLIC CAPITAL LETTER STRAIGHT U
+04B0; C; 04B1; # CYRILLIC CAPITAL LETTER STRAIGHT U WITH STROKE
+04B2; C; 04B3; # CYRILLIC CAPITAL LETTER HA WITH DESCENDER
+04B4; C; 04B5; # CYRILLIC CAPITAL LIGATURE TE TSE
+04B6; C; 04B7; # CYRILLIC CAPITAL LETTER CHE WITH DESCENDER
+04B8; C; 04B9; # CYRILLIC CAPITAL LETTER CHE WITH VERTICAL STROKE
+04BA; C; 04BB; # CYRILLIC CAPITAL LETTER SHHA
+04BC; C; 04BD; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE
+04BE; C; 04BF; # CYRILLIC CAPITAL LETTER ABKHASIAN CHE WITH DESCENDER
+04C1; C; 04C2; # CYRILLIC CAPITAL LETTER ZHE WITH BREVE
+04C3; C; 04C4; # CYRILLIC CAPITAL LETTER KA WITH HOOK
+04C5; C; 04C6; # CYRILLIC CAPITAL LETTER EL WITH TAIL
+04C7; C; 04C8; # CYRILLIC CAPITAL LETTER EN WITH HOOK
+04C9; C; 04CA; # CYRILLIC CAPITAL LETTER EN WITH TAIL
+04CB; C; 04CC; # CYRILLIC CAPITAL LETTER KHAKASSIAN CHE
+04CD; C; 04CE; # CYRILLIC CAPITAL LETTER EM WITH TAIL
+04D0; C; 04D1; # CYRILLIC CAPITAL LETTER A WITH BREVE
+04D2; C; 04D3; # CYRILLIC CAPITAL LETTER A WITH DIAERESIS
+04D4; C; 04D5; # CYRILLIC CAPITAL LIGATURE A IE
+04D6; C; 04D7; # CYRILLIC CAPITAL LETTER IE WITH BREVE
+04D8; C; 04D9; # CYRILLIC CAPITAL LETTER SCHWA
+04DA; C; 04DB; # CYRILLIC CAPITAL LETTER SCHWA WITH DIAERESIS
+04DC; C; 04DD; # CYRILLIC CAPITAL LETTER ZHE WITH DIAERESIS
+04DE; C; 04DF; # CYRILLIC CAPITAL LETTER ZE WITH DIAERESIS
+04E0; C; 04E1; # CYRILLIC CAPITAL LETTER ABKHASIAN DZE
+04E2; C; 04E3; # CYRILLIC CAPITAL LETTER I WITH MACRON
+04E4; C; 04E5; # CYRILLIC CAPITAL LETTER I WITH DIAERESIS
+04E6; C; 04E7; # CYRILLIC CAPITAL LETTER O WITH DIAERESIS
+04E8; C; 04E9; # CYRILLIC CAPITAL LETTER BARRED O
+04EA; C; 04EB; # CYRILLIC CAPITAL LETTER BARRED O WITH DIAERESIS
+04EC; C; 04ED; # CYRILLIC CAPITAL LETTER E WITH DIAERESIS
+04EE; C; 04EF; # CYRILLIC CAPITAL LETTER U WITH MACRON
+04F0; C; 04F1; # CYRILLIC CAPITAL LETTER U WITH DIAERESIS
+04F2; C; 04F3; # CYRILLIC CAPITAL LETTER U WITH DOUBLE ACUTE
+04F4; C; 04F5; # CYRILLIC CAPITAL LETTER CHE WITH DIAERESIS
+04F8; C; 04F9; # CYRILLIC CAPITAL LETTER YERU WITH DIAERESIS
+0500; C; 0501; # CYRILLIC CAPITAL LETTER KOMI DE
+0502; C; 0503; # CYRILLIC CAPITAL LETTER KOMI DJE
+0504; C; 0505; # CYRILLIC CAPITAL LETTER KOMI ZJE
+0506; C; 0507; # CYRILLIC CAPITAL LETTER KOMI DZJE
+0508; C; 0509; # CYRILLIC CAPITAL LETTER KOMI LJE
+050A; C; 050B; # CYRILLIC CAPITAL LETTER KOMI NJE
+050C; C; 050D; # CYRILLIC CAPITAL LETTER KOMI SJE
+050E; C; 050F; # CYRILLIC CAPITAL LETTER KOMI TJE
+0531; C; 0561; # ARMENIAN CAPITAL LETTER AYB
+0532; C; 0562; # ARMENIAN CAPITAL LETTER BEN
+0533; C; 0563; # ARMENIAN CAPITAL LETTER GIM
+0534; C; 0564; # ARMENIAN CAPITAL LETTER DA
+0535; C; 0565; # ARMENIAN CAPITAL LETTER ECH
+0536; C; 0566; # ARMENIAN CAPITAL LETTER ZA
+0537; C; 0567; # ARMENIAN CAPITAL LETTER EH
+0538; C; 0568; # ARMENIAN CAPITAL LETTER ET
+0539; C; 0569; # ARMENIAN CAPITAL LETTER TO
+053A; C; 056A; # ARMENIAN CAPITAL LETTER ZHE
+053B; C; 056B; # ARMENIAN CAPITAL LETTER INI
+053C; C; 056C; # ARMENIAN CAPITAL LETTER LIWN
+053D; C; 056D; # ARMENIAN CAPITAL LETTER XEH
+053E; C; 056E; # ARMENIAN CAPITAL LETTER CA
+053F; C; 056F; # ARMENIAN CAPITAL LETTER KEN
+0540; C; 0570; # ARMENIAN CAPITAL LETTER HO
+0541; C; 0571; # ARMENIAN CAPITAL LETTER JA
+0542; C; 0572; # ARMENIAN CAPITAL LETTER GHAD
+0543; C; 0573; # ARMENIAN CAPITAL LETTER CHEH
+0544; C; 0574; # ARMENIAN CAPITAL LETTER MEN
+0545; C; 0575; # ARMENIAN CAPITAL LETTER YI
+0546; C; 0576; # ARMENIAN CAPITAL LETTER NOW
+0547; C; 0577; # ARMENIAN CAPITAL LETTER SHA
+0548; C; 0578; # ARMENIAN CAPITAL LETTER VO
+0549; C; 0579; # ARMENIAN CAPITAL LETTER CHA
+054A; C; 057A; # ARMENIAN CAPITAL LETTER PEH
+054B; C; 057B; # ARMENIAN CAPITAL LETTER JHEH
+054C; C; 057C; # ARMENIAN CAPITAL LETTER RA
+054D; C; 057D; # ARMENIAN CAPITAL LETTER SEH
+054E; C; 057E; # ARMENIAN CAPITAL LETTER VEW
+054F; C; 057F; # ARMENIAN CAPITAL LETTER TIWN
+0550; C; 0580; # ARMENIAN CAPITAL LETTER REH
+0551; C; 0581; # ARMENIAN CAPITAL LETTER CO
+0552; C; 0582; # ARMENIAN CAPITAL LETTER YIWN
+0553; C; 0583; # ARMENIAN CAPITAL LETTER PIWR
+0554; C; 0584; # ARMENIAN CAPITAL LETTER KEH
+0555; C; 0585; # ARMENIAN CAPITAL LETTER OH
+0556; C; 0586; # ARMENIAN CAPITAL LETTER FEH
+0587; F; 0565 0582; # ARMENIAN SMALL LIGATURE ECH YIWN
+1E00; C; 1E01; # LATIN CAPITAL LETTER A WITH RING BELOW
+1E02; C; 1E03; # LATIN CAPITAL LETTER B WITH DOT ABOVE
+1E04; C; 1E05; # LATIN CAPITAL LETTER B WITH DOT BELOW
+1E06; C; 1E07; # LATIN CAPITAL LETTER B WITH LINE BELOW
+1E08; C; 1E09; # LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE
+1E0A; C; 1E0B; # LATIN CAPITAL LETTER D WITH DOT ABOVE
+1E0C; C; 1E0D; # LATIN CAPITAL LETTER D WITH DOT BELOW
+1E0E; C; 1E0F; # LATIN CAPITAL LETTER D WITH LINE BELOW
+1E10; C; 1E11; # LATIN CAPITAL LETTER D WITH CEDILLA
+1E12; C; 1E13; # LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW
+1E14; C; 1E15; # LATIN CAPITAL LETTER E WITH MACRON AND GRAVE
+1E16; C; 1E17; # LATIN CAPITAL LETTER E WITH MACRON AND ACUTE
+1E18; C; 1E19; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW
+1E1A; C; 1E1B; # LATIN CAPITAL LETTER E WITH TILDE BELOW
+1E1C; C; 1E1D; # LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE
+1E1E; C; 1E1F; # LATIN CAPITAL LETTER F WITH DOT ABOVE
+1E20; C; 1E21; # LATIN CAPITAL LETTER G WITH MACRON
+1E22; C; 1E23; # LATIN CAPITAL LETTER H WITH DOT ABOVE
+1E24; C; 1E25; # LATIN CAPITAL LETTER H WITH DOT BELOW
+1E26; C; 1E27; # LATIN CAPITAL LETTER H WITH DIAERESIS
+1E28; C; 1E29; # LATIN CAPITAL LETTER H WITH CEDILLA
+1E2A; C; 1E2B; # LATIN CAPITAL LETTER H WITH BREVE BELOW
+1E2C; C; 1E2D; # LATIN CAPITAL LETTER I WITH TILDE BELOW
+1E2E; C; 1E2F; # LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE
+1E30; C; 1E31; # LATIN CAPITAL LETTER K WITH ACUTE
+1E32; C; 1E33; # LATIN CAPITAL LETTER K WITH DOT BELOW
+1E34; C; 1E35; # LATIN CAPITAL LETTER K WITH LINE BELOW
+1E36; C; 1E37; # LATIN CAPITAL LETTER L WITH DOT BELOW
+1E38; C; 1E39; # LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON
+1E3A; C; 1E3B; # LATIN CAPITAL LETTER L WITH LINE BELOW
+1E3C; C; 1E3D; # LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW
+1E3E; C; 1E3F; # LATIN CAPITAL LETTER M WITH ACUTE
+1E40; C; 1E41; # LATIN CAPITAL LETTER M WITH DOT ABOVE
+1E42; C; 1E43; # LATIN CAPITAL LETTER M WITH DOT BELOW
+1E44; C; 1E45; # LATIN CAPITAL LETTER N WITH DOT ABOVE
+1E46; C; 1E47; # LATIN CAPITAL LETTER N WITH DOT BELOW
+1E48; C; 1E49; # LATIN CAPITAL LETTER N WITH LINE BELOW
+1E4A; C; 1E4B; # LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW
+1E4C; C; 1E4D; # LATIN CAPITAL LETTER O WITH TILDE AND ACUTE
+1E4E; C; 1E4F; # LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS
+1E50; C; 1E51; # LATIN CAPITAL LETTER O WITH MACRON AND GRAVE
+1E52; C; 1E53; # LATIN CAPITAL LETTER O WITH MACRON AND ACUTE
+1E54; C; 1E55; # LATIN CAPITAL LETTER P WITH ACUTE
+1E56; C; 1E57; # LATIN CAPITAL LETTER P WITH DOT ABOVE
+1E58; C; 1E59; # LATIN CAPITAL LETTER R WITH DOT ABOVE
+1E5A; C; 1E5B; # LATIN CAPITAL LETTER R WITH DOT BELOW
+1E5C; C; 1E5D; # LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON
+1E5E; C; 1E5F; # LATIN CAPITAL LETTER R WITH LINE BELOW
+1E60; C; 1E61; # LATIN CAPITAL LETTER S WITH DOT ABOVE
+1E62; C; 1E63; # LATIN CAPITAL LETTER S WITH DOT BELOW
+1E64; C; 1E65; # LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE
+1E66; C; 1E67; # LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE
+1E68; C; 1E69; # LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE
+1E6A; C; 1E6B; # LATIN CAPITAL LETTER T WITH DOT ABOVE
+1E6C; C; 1E6D; # LATIN CAPITAL LETTER T WITH DOT BELOW
+1E6E; C; 1E6F; # LATIN CAPITAL LETTER T WITH LINE BELOW
+1E70; C; 1E71; # LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW
+1E72; C; 1E73; # LATIN CAPITAL LETTER U WITH DIAERESIS BELOW
+1E74; C; 1E75; # LATIN CAPITAL LETTER U WITH TILDE BELOW
+1E76; C; 1E77; # LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW
+1E78; C; 1E79; # LATIN CAPITAL LETTER U WITH TILDE AND ACUTE
+1E7A; C; 1E7B; # LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS
+1E7C; C; 1E7D; # LATIN CAPITAL LETTER V WITH TILDE
+1E7E; C; 1E7F; # LATIN CAPITAL LETTER V WITH DOT BELOW
+1E80; C; 1E81; # LATIN CAPITAL LETTER W WITH GRAVE
+1E82; C; 1E83; # LATIN CAPITAL LETTER W WITH ACUTE
+1E84; C; 1E85; # LATIN CAPITAL LETTER W WITH DIAERESIS
+1E86; C; 1E87; # LATIN CAPITAL LETTER W WITH DOT ABOVE
+1E88; C; 1E89; # LATIN CAPITAL LETTER W WITH DOT BELOW
+1E8A; C; 1E8B; # LATIN CAPITAL LETTER X WITH DOT ABOVE
+1E8C; C; 1E8D; # LATIN CAPITAL LETTER X WITH DIAERESIS
+1E8E; C; 1E8F; # LATIN CAPITAL LETTER Y WITH DOT ABOVE
+1E90; C; 1E91; # LATIN CAPITAL LETTER Z WITH CIRCUMFLEX
+1E92; C; 1E93; # LATIN CAPITAL LETTER Z WITH DOT BELOW
+1E94; C; 1E95; # LATIN CAPITAL LETTER Z WITH LINE BELOW
+1E96; F; 0068 0331; # LATIN SMALL LETTER H WITH LINE BELOW
+1E97; F; 0074 0308; # LATIN SMALL LETTER T WITH DIAERESIS
+1E98; F; 0077 030A; # LATIN SMALL LETTER W WITH RING ABOVE
+1E99; F; 0079 030A; # LATIN SMALL LETTER Y WITH RING ABOVE
+1E9A; F; 0061 02BE; # LATIN SMALL LETTER A WITH RIGHT HALF RING
+1E9B; C; 1E61; # LATIN SMALL LETTER LONG S WITH DOT ABOVE
+1EA0; C; 1EA1; # LATIN CAPITAL LETTER A WITH DOT BELOW
+1EA2; C; 1EA3; # LATIN CAPITAL LETTER A WITH HOOK ABOVE
+1EA4; C; 1EA5; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE
+1EA6; C; 1EA7; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE
+1EA8; C; 1EA9; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE
+1EAA; C; 1EAB; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE
+1EAC; C; 1EAD; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW
+1EAE; C; 1EAF; # LATIN CAPITAL LETTER A WITH BREVE AND ACUTE
+1EB0; C; 1EB1; # LATIN CAPITAL LETTER A WITH BREVE AND GRAVE
+1EB2; C; 1EB3; # LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE
+1EB4; C; 1EB5; # LATIN CAPITAL LETTER A WITH BREVE AND TILDE
+1EB6; C; 1EB7; # LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW
+1EB8; C; 1EB9; # LATIN CAPITAL LETTER E WITH DOT BELOW
+1EBA; C; 1EBB; # LATIN CAPITAL LETTER E WITH HOOK ABOVE
+1EBC; C; 1EBD; # LATIN CAPITAL LETTER E WITH TILDE
+1EBE; C; 1EBF; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE
+1EC0; C; 1EC1; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE
+1EC2; C; 1EC3; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE
+1EC4; C; 1EC5; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE
+1EC6; C; 1EC7; # LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW
+1EC8; C; 1EC9; # LATIN CAPITAL LETTER I WITH HOOK ABOVE
+1ECA; C; 1ECB; # LATIN CAPITAL LETTER I WITH DOT BELOW
+1ECC; C; 1ECD; # LATIN CAPITAL LETTER O WITH DOT BELOW
+1ECE; C; 1ECF; # LATIN CAPITAL LETTER O WITH HOOK ABOVE
+1ED0; C; 1ED1; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE
+1ED2; C; 1ED3; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE
+1ED4; C; 1ED5; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE
+1ED6; C; 1ED7; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE
+1ED8; C; 1ED9; # LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW
+1EDA; C; 1EDB; # LATIN CAPITAL LETTER O WITH HORN AND ACUTE
+1EDC; C; 1EDD; # LATIN CAPITAL LETTER O WITH HORN AND GRAVE
+1EDE; C; 1EDF; # LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE
+1EE0; C; 1EE1; # LATIN CAPITAL LETTER O WITH HORN AND TILDE
+1EE2; C; 1EE3; # LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW
+1EE4; C; 1EE5; # LATIN CAPITAL LETTER U WITH DOT BELOW
+1EE6; C; 1EE7; # LATIN CAPITAL LETTER U WITH HOOK ABOVE
+1EE8; C; 1EE9; # LATIN CAPITAL LETTER U WITH HORN AND ACUTE
+1EEA; C; 1EEB; # LATIN CAPITAL LETTER U WITH HORN AND GRAVE
+1EEC; C; 1EED; # LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE
+1EEE; C; 1EEF; # LATIN CAPITAL LETTER U WITH HORN AND TILDE
+1EF0; C; 1EF1; # LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW
+1EF2; C; 1EF3; # LATIN CAPITAL LETTER Y WITH GRAVE
+1EF4; C; 1EF5; # LATIN CAPITAL LETTER Y WITH DOT BELOW
+1EF6; C; 1EF7; # LATIN CAPITAL LETTER Y WITH HOOK ABOVE
+1EF8; C; 1EF9; # LATIN CAPITAL LETTER Y WITH TILDE
+1F08; C; 1F00; # GREEK CAPITAL LETTER ALPHA WITH PSILI
+1F09; C; 1F01; # GREEK CAPITAL LETTER ALPHA WITH DASIA
+1F0A; C; 1F02; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA
+1F0B; C; 1F03; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA
+1F0C; C; 1F04; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA
+1F0D; C; 1F05; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA
+1F0E; C; 1F06; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI
+1F0F; C; 1F07; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI
+1F18; C; 1F10; # GREEK CAPITAL LETTER EPSILON WITH PSILI
+1F19; C; 1F11; # GREEK CAPITAL LETTER EPSILON WITH DASIA
+1F1A; C; 1F12; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND VARIA
+1F1B; C; 1F13; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND VARIA
+1F1C; C; 1F14; # GREEK CAPITAL LETTER EPSILON WITH PSILI AND OXIA
+1F1D; C; 1F15; # GREEK CAPITAL LETTER EPSILON WITH DASIA AND OXIA
+1F28; C; 1F20; # GREEK CAPITAL LETTER ETA WITH PSILI
+1F29; C; 1F21; # GREEK CAPITAL LETTER ETA WITH DASIA
+1F2A; C; 1F22; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA
+1F2B; C; 1F23; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA
+1F2C; C; 1F24; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA
+1F2D; C; 1F25; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA
+1F2E; C; 1F26; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI
+1F2F; C; 1F27; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI
+1F38; C; 1F30; # GREEK CAPITAL LETTER IOTA WITH PSILI
+1F39; C; 1F31; # GREEK CAPITAL LETTER IOTA WITH DASIA
+1F3A; C; 1F32; # GREEK CAPITAL LETTER IOTA WITH PSILI AND VARIA
+1F3B; C; 1F33; # GREEK CAPITAL LETTER IOTA WITH DASIA AND VARIA
+1F3C; C; 1F34; # GREEK CAPITAL LETTER IOTA WITH PSILI AND OXIA
+1F3D; C; 1F35; # GREEK CAPITAL LETTER IOTA WITH DASIA AND OXIA
+1F3E; C; 1F36; # GREEK CAPITAL LETTER IOTA WITH PSILI AND PERISPOMENI
+1F3F; C; 1F37; # GREEK CAPITAL LETTER IOTA WITH DASIA AND PERISPOMENI
+1F48; C; 1F40; # GREEK CAPITAL LETTER OMICRON WITH PSILI
+1F49; C; 1F41; # GREEK CAPITAL LETTER OMICRON WITH DASIA
+1F4A; C; 1F42; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND VARIA
+1F4B; C; 1F43; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND VARIA
+1F4C; C; 1F44; # GREEK CAPITAL LETTER OMICRON WITH PSILI AND OXIA
+1F4D; C; 1F45; # GREEK CAPITAL LETTER OMICRON WITH DASIA AND OXIA
+1F50; F; 03C5 0313; # GREEK SMALL LETTER UPSILON WITH PSILI
+1F52; F; 03C5 0313 0300; # GREEK SMALL LETTER UPSILON WITH PSILI AND VARIA
+1F54; F; 03C5 0313 0301; # GREEK SMALL LETTER UPSILON WITH PSILI AND OXIA
+1F56; F; 03C5 0313 0342; # GREEK SMALL LETTER UPSILON WITH PSILI AND 
PERISPOMENI
+1F59; C; 1F51; # GREEK CAPITAL LETTER UPSILON WITH DASIA
+1F5B; C; 1F53; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND VARIA
+1F5D; C; 1F55; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND OXIA
+1F5F; C; 1F57; # GREEK CAPITAL LETTER UPSILON WITH DASIA AND PERISPOMENI
+1F68; C; 1F60; # GREEK CAPITAL LETTER OMEGA WITH PSILI
+1F69; C; 1F61; # GREEK CAPITAL LETTER OMEGA WITH DASIA
+1F6A; C; 1F62; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA
+1F6B; C; 1F63; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA
+1F6C; C; 1F64; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA
+1F6D; C; 1F65; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA
+1F6E; C; 1F66; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI
+1F6F; C; 1F67; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI
+1F80; F; 1F00 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND YPOGEGRAMMENI
+1F81; F; 1F01 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND YPOGEGRAMMENI
+1F82; F; 1F02 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND VARIA AND 
YPOGEGRAMMENI
+1F83; F; 1F03 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND VARIA AND 
YPOGEGRAMMENI
+1F84; F; 1F04 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND OXIA AND 
YPOGEGRAMMENI
+1F85; F; 1F05 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND OXIA AND 
YPOGEGRAMMENI
+1F86; F; 1F06 03B9; # GREEK SMALL LETTER ALPHA WITH PSILI AND PERISPOMENI AND 
YPOGEGRAMMENI
+1F87; F; 1F07 03B9; # GREEK SMALL LETTER ALPHA WITH DASIA AND PERISPOMENI AND 
YPOGEGRAMMENI
+1F88; F; 1F00 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F88; S; 1F80; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PROSGEGRAMMENI
+1F89; F; 1F01 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F89; S; 1F81; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PROSGEGRAMMENI
+1F8A; F; 1F02 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1F8A; S; 1F82; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1F8B; F; 1F03 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1F8B; S; 1F83; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1F8C; F; 1F04 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1F8C; S; 1F84; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1F8D; F; 1F05 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1F8D; S; 1F85; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1F8E; F; 1F06 03B9; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI 
AND PROSGEGRAMMENI
+1F8E; S; 1F86; # GREEK CAPITAL LETTER ALPHA WITH PSILI AND PERISPOMENI AND 
PROSGEGRAMMENI
+1F8F; F; 1F07 03B9; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI 
AND PROSGEGRAMMENI
+1F8F; S; 1F87; # GREEK CAPITAL LETTER ALPHA WITH DASIA AND PERISPOMENI AND 
PROSGEGRAMMENI
+1F90; F; 1F20 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND YPOGEGRAMMENI
+1F91; F; 1F21 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND YPOGEGRAMMENI
+1F92; F; 1F22 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND VARIA AND 
YPOGEGRAMMENI
+1F93; F; 1F23 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND VARIA AND 
YPOGEGRAMMENI
+1F94; F; 1F24 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND OXIA AND 
YPOGEGRAMMENI
+1F95; F; 1F25 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND OXIA AND 
YPOGEGRAMMENI
+1F96; F; 1F26 03B9; # GREEK SMALL LETTER ETA WITH PSILI AND PERISPOMENI AND 
YPOGEGRAMMENI
+1F97; F; 1F27 03B9; # GREEK SMALL LETTER ETA WITH DASIA AND PERISPOMENI AND 
YPOGEGRAMMENI
+1F98; F; 1F20 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F98; S; 1F90; # GREEK CAPITAL LETTER ETA WITH PSILI AND PROSGEGRAMMENI
+1F99; F; 1F21 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F99; S; 1F91; # GREEK CAPITAL LETTER ETA WITH DASIA AND PROSGEGRAMMENI
+1F9A; F; 1F22 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1F9A; S; 1F92; # GREEK CAPITAL LETTER ETA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1F9B; F; 1F23 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1F9B; S; 1F93; # GREEK CAPITAL LETTER ETA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1F9C; F; 1F24 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1F9C; S; 1F94; # GREEK CAPITAL LETTER ETA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1F9D; F; 1F25 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1F9D; S; 1F95; # GREEK CAPITAL LETTER ETA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1F9E; F; 1F26 03B9; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND 
PROSGEGRAMMENI
+1F9E; S; 1F96; # GREEK CAPITAL LETTER ETA WITH PSILI AND PERISPOMENI AND 
PROSGEGRAMMENI
+1F9F; F; 1F27 03B9; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND 
PROSGEGRAMMENI
+1F9F; S; 1F97; # GREEK CAPITAL LETTER ETA WITH DASIA AND PERISPOMENI AND 
PROSGEGRAMMENI
+1FA0; F; 1F60 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND YPOGEGRAMMENI
+1FA1; F; 1F61 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND YPOGEGRAMMENI
+1FA2; F; 1F62 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND VARIA AND 
YPOGEGRAMMENI
+1FA3; F; 1F63 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND VARIA AND 
YPOGEGRAMMENI
+1FA4; F; 1F64 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND OXIA AND 
YPOGEGRAMMENI
+1FA5; F; 1F65 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND OXIA AND 
YPOGEGRAMMENI
+1FA6; F; 1F66 03B9; # GREEK SMALL LETTER OMEGA WITH PSILI AND PERISPOMENI AND 
YPOGEGRAMMENI
+1FA7; F; 1F67 03B9; # GREEK SMALL LETTER OMEGA WITH DASIA AND PERISPOMENI AND 
YPOGEGRAMMENI
+1FA8; F; 1F60 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA8; S; 1FA0; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PROSGEGRAMMENI
+1FA9; F; 1F61 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FA9; S; 1FA1; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PROSGEGRAMMENI
+1FAA; F; 1F62 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1FAA; S; 1FA2; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND VARIA AND 
PROSGEGRAMMENI
+1FAB; F; 1F63 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1FAB; S; 1FA3; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND VARIA AND 
PROSGEGRAMMENI
+1FAC; F; 1F64 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1FAC; S; 1FA4; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND OXIA AND 
PROSGEGRAMMENI
+1FAD; F; 1F65 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1FAD; S; 1FA5; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND OXIA AND 
PROSGEGRAMMENI
+1FAE; F; 1F66 03B9; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI 
AND PROSGEGRAMMENI
+1FAE; S; 1FA6; # GREEK CAPITAL LETTER OMEGA WITH PSILI AND PERISPOMENI AND 
PROSGEGRAMMENI
+1FAF; F; 1F67 03B9; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI 
AND PROSGEGRAMMENI
+1FAF; S; 1FA7; # GREEK CAPITAL LETTER OMEGA WITH DASIA AND PERISPOMENI AND 
PROSGEGRAMMENI
+1FB2; F; 1F70 03B9; # GREEK SMALL LETTER ALPHA WITH VARIA AND YPOGEGRAMMENI
+1FB3; F; 03B1 03B9; # GREEK SMALL LETTER ALPHA WITH YPOGEGRAMMENI
+1FB4; F; 03AC 03B9; # GREEK SMALL LETTER ALPHA WITH OXIA AND YPOGEGRAMMENI
+1FB6; F; 03B1 0342; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI
+1FB7; F; 03B1 0342 03B9; # GREEK SMALL LETTER ALPHA WITH PERISPOMENI AND 
YPOGEGRAMMENI
+1FB8; C; 1FB0; # GREEK CAPITAL LETTER ALPHA WITH VRACHY
+1FB9; C; 1FB1; # GREEK CAPITAL LETTER ALPHA WITH MACRON
+1FBA; C; 1F70; # GREEK CAPITAL LETTER ALPHA WITH VARIA
+1FBB; C; 1F71; # GREEK CAPITAL LETTER ALPHA WITH OXIA
+1FBC; F; 03B1 03B9; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBC; S; 1FB3; # GREEK CAPITAL LETTER ALPHA WITH PROSGEGRAMMENI
+1FBE; C; 03B9; # GREEK PROSGEGRAMMENI
+1FC2; F; 1F74 03B9; # GREEK SMALL LETTER ETA WITH VARIA AND YPOGEGRAMMENI
+1FC3; F; 03B7 03B9; # GREEK SMALL LETTER ETA WITH YPOGEGRAMMENI
+1FC4; F; 03AE 03B9; # GREEK SMALL LETTER ETA WITH OXIA AND YPOGEGRAMMENI
+1FC6; F; 03B7 0342; # GREEK SMALL LETTER ETA WITH PERISPOMENI
+1FC7; F; 03B7 0342 03B9; # GREEK SMALL LETTER ETA WITH PERISPOMENI AND 
YPOGEGRAMMENI
+1FC8; C; 1F72; # GREEK CAPITAL LETTER EPSILON WITH VARIA
+1FC9; C; 1F73; # GREEK CAPITAL LETTER EPSILON WITH OXIA
+1FCA; C; 1F74; # GREEK CAPITAL LETTER ETA WITH VARIA
+1FCB; C; 1F75; # GREEK CAPITAL LETTER ETA WITH OXIA
+1FCC; F; 03B7 03B9; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FCC; S; 1FC3; # GREEK CAPITAL LETTER ETA WITH PROSGEGRAMMENI
+1FD2; F; 03B9 0308 0300; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND VARIA
+1FD3; F; 03B9 0308 0301; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND OXIA
+1FD6; F; 03B9 0342; # GREEK SMALL LETTER IOTA WITH PERISPOMENI
+1FD7; F; 03B9 0308 0342; # GREEK SMALL LETTER IOTA WITH DIALYTIKA AND 
PERISPOMENI
+1FD8; C; 1FD0; # GREEK CAPITAL LETTER IOTA WITH VRACHY
+1FD9; C; 1FD1; # GREEK CAPITAL LETTER IOTA WITH MACRON
+1FDA; C; 1F76; # GREEK CAPITAL LETTER IOTA WITH VARIA
+1FDB; C; 1F77; # GREEK CAPITAL LETTER IOTA WITH OXIA
+1FE2; F; 03C5 0308 0300; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND VARIA
+1FE3; F; 03C5 0308 0301; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND OXIA
+1FE4; F; 03C1 0313; # GREEK SMALL LETTER RHO WITH PSILI
+1FE6; F; 03C5 0342; # GREEK SMALL LETTER UPSILON WITH PERISPOMENI
+1FE7; F; 03C5 0308 0342; # GREEK SMALL LETTER UPSILON WITH DIALYTIKA AND 
PERISPOMENI
+1FE8; C; 1FE0; # GREEK CAPITAL LETTER UPSILON WITH VRACHY
+1FE9; C; 1FE1; # GREEK CAPITAL LETTER UPSILON WITH MACRON
+1FEA; C; 1F7A; # GREEK CAPITAL LETTER UPSILON WITH VARIA
+1FEB; C; 1F7B; # GREEK CAPITAL LETTER UPSILON WITH OXIA
+1FEC; C; 1FE5; # GREEK CAPITAL LETTER RHO WITH DASIA
+1FF2; F; 1F7C 03B9; # GREEK SMALL LETTER OMEGA WITH VARIA AND YPOGEGRAMMENI
+1FF3; F; 03C9 03B9; # GREEK SMALL LETTER OMEGA WITH YPOGEGRAMMENI
+1FF4; F; 03CE 03B9; # GREEK SMALL LETTER OMEGA WITH OXIA AND YPOGEGRAMMENI
+1FF6; F; 03C9 0342; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI
+1FF7; F; 03C9 0342 03B9; # GREEK SMALL LETTER OMEGA WITH PERISPOMENI AND 
YPOGEGRAMMENI
+1FF8; C; 1F78; # GREEK CAPITAL LETTER OMICRON WITH VARIA
+1FF9; C; 1F79; # GREEK CAPITAL LETTER OMICRON WITH OXIA
+1FFA; C; 1F7C; # GREEK CAPITAL LETTER OMEGA WITH VARIA
+1FFB; C; 1F7D; # GREEK CAPITAL LETTER OMEGA WITH OXIA
+1FFC; F; 03C9 03B9; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+1FFC; S; 1FF3; # GREEK CAPITAL LETTER OMEGA WITH PROSGEGRAMMENI
+2126; C; 03C9; # OHM SIGN
+212A; C; 006B; # KELVIN SIGN
+212B; C; 00E5; # ANGSTROM SIGN
+2160; C; 2170; # ROMAN NUMERAL ONE
+2161; C; 2171; # ROMAN NUMERAL TWO
+2162; C; 2172; # ROMAN NUMERAL THREE
+2163; C; 2173; # ROMAN NUMERAL FOUR
+2164; C; 2174; # ROMAN NUMERAL FIVE
+2165; C; 2175; # ROMAN NUMERAL SIX
+2166; C; 2176; # ROMAN NUMERAL SEVEN
+2167; C; 2177; # ROMAN NUMERAL EIGHT
+2168; C; 2178; # ROMAN NUMERAL NINE
+2169; C; 2179; # ROMAN NUMERAL TEN
+216A; C; 217A; # ROMAN NUMERAL ELEVEN
+216B; C; 217B; # ROMAN NUMERAL TWELVE
+216C; C; 217C; # ROMAN NUMERAL FIFTY
+216D; C; 217D; # ROMAN NUMERAL ONE HUNDRED
+216E; C; 217E; # ROMAN NUMERAL FIVE HUNDRED
+216F; C; 217F; # ROMAN NUMERAL ONE THOUSAND
+24B6; C; 24D0; # CIRCLED LATIN CAPITAL LETTER A
+24B7; C; 24D1; # CIRCLED LATIN CAPITAL LETTER B
+24B8; C; 24D2; # CIRCLED LATIN CAPITAL LETTER C
+24B9; C; 24D3; # CIRCLED LATIN CAPITAL LETTER D
+24BA; C; 24D4; # CIRCLED LATIN CAPITAL LETTER E
+24BB; C; 24D5; # CIRCLED LATIN CAPITAL LETTER F
+24BC; C; 24D6; # CIRCLED LATIN CAPITAL LETTER G
+24BD; C; 24D7; # CIRCLED LATIN CAPITAL LETTER H
+24BE; C; 24D8; # CIRCLED LATIN CAPITAL LETTER I
+24BF; C; 24D9; # CIRCLED LATIN CAPITAL LETTER J
+24C0; C; 24DA; # CIRCLED LATIN CAPITAL LETTER K
+24C1; C; 24DB; # CIRCLED LATIN CAPITAL LETTER L
+24C2; C; 24DC; # CIRCLED LATIN CAPITAL LETTER M
+24C3; C; 24DD; # CIRCLED LATIN CAPITAL LETTER N
+24C4; C; 24DE; # CIRCLED LATIN CAPITAL LETTER O
+24C5; C; 24DF; # CIRCLED LATIN CAPITAL LETTER P
+24C6; C; 24E0; # CIRCLED LATIN CAPITAL LETTER Q
+24C7; C; 24E1; # CIRCLED LATIN CAPITAL LETTER R
+24C8; C; 24E2; # CIRCLED LATIN CAPITAL LETTER S
+24C9; C; 24E3; # CIRCLED LATIN CAPITAL LETTER T
+24CA; C; 24E4; # CIRCLED LATIN CAPITAL LETTER U
+24CB; C; 24E5; # CIRCLED LATIN CAPITAL LETTER V
+24CC; C; 24E6; # CIRCLED LATIN CAPITAL LETTER W
+24CD; C; 24E7; # CIRCLED LATIN CAPITAL LETTER X
+24CE; C; 24E8; # CIRCLED LATIN CAPITAL LETTER Y
+24CF; C; 24E9; # CIRCLED LATIN CAPITAL LETTER Z
+FB00; F; 0066 0066; # LATIN SMALL LIGATURE FF
+FB01; F; 0066 0069; # LATIN SMALL LIGATURE FI
+FB02; F; 0066 006C; # LATIN SMALL LIGATURE FL
+FB03; F; 0066 0066 0069; # LATIN SMALL LIGATURE FFI
+FB04; F; 0066 0066 006C; # LATIN SMALL LIGATURE FFL
+FB05; F; 0073 0074; # LATIN SMALL LIGATURE LONG S T
+FB06; F; 0073 0074; # LATIN SMALL LIGATURE ST
+FB13; F; 0574 0576; # ARMENIAN SMALL LIGATURE MEN NOW
+FB14; F; 0574 0565; # ARMENIAN SMALL LIGATURE MEN ECH
+FB15; F; 0574 056B; # ARMENIAN SMALL LIGATURE MEN INI
+FB16; F; 057E 0576; # ARMENIAN SMALL LIGATURE VEW NOW
+FB17; F; 0574 056D; # ARMENIAN SMALL LIGATURE MEN XEH
+FF21; C; FF41; # FULLWIDTH LATIN CAPITAL LETTER A
+FF22; C; FF42; # FULLWIDTH LATIN CAPITAL LETTER B
+FF23; C; FF43; # FULLWIDTH LATIN CAPITAL LETTER C
+FF24; C; FF44; # FULLWIDTH LATIN CAPITAL LETTER D
+FF25; C; FF45; # FULLWIDTH LATIN CAPITAL LETTER E
+FF26; C; FF46; # FULLWIDTH LATIN CAPITAL LETTER F
+FF27; C; FF47; # FULLWIDTH LATIN CAPITAL LETTER G
+FF28; C; FF48; # FULLWIDTH LATIN CAPITAL LETTER H
+FF29; C; FF49; # FULLWIDTH LATIN CAPITAL LETTER I
+FF2A; C; FF4A; # FULLWIDTH LATIN CAPITAL LETTER J
+FF2B; C; FF4B; # FULLWIDTH LATIN CAPITAL LETTER K
+FF2C; C; FF4C; # FULLWIDTH LATIN CAPITAL LETTER L
+FF2D; C; FF4D; # FULLWIDTH LATIN CAPITAL LETTER M
+FF2E; C; FF4E; # FULLWIDTH LATIN CAPITAL LETTER N
+FF2F; C; FF4F; # FULLWIDTH LATIN CAPITAL LETTER O
+FF30; C; FF50; # FULLWIDTH LATIN CAPITAL LETTER P
+FF31; C; FF51; # FULLWIDTH LATIN CAPITAL LETTER Q
+FF32; C; FF52; # FULLWIDTH LATIN CAPITAL LETTER R
+FF33; C; FF53; # FULLWIDTH LATIN CAPITAL LETTER S
+FF34; C; FF54; # FULLWIDTH LATIN CAPITAL LETTER T
+FF35; C; FF55; # FULLWIDTH LATIN CAPITAL LETTER U
+FF36; C; FF56; # FULLWIDTH LATIN CAPITAL LETTER V
+FF37; C; FF57; # FULLWIDTH LATIN CAPITAL LETTER W
+FF38; C; FF58; # FULLWIDTH LATIN CAPITAL LETTER X
+FF39; C; FF59; # FULLWIDTH LATIN CAPITAL LETTER Y
+FF3A; C; FF5A; # FULLWIDTH LATIN CAPITAL LETTER Z
+10400; C; 10428; # DESERET CAPITAL LETTER LONG I
+10401; C; 10429; # DESERET CAPITAL LETTER LONG E
+10402; C; 1042A; # DESERET CAPITAL LETTER LONG A
+10403; C; 1042B; # DESERET CAPITAL LETTER LONG AH
+10404; C; 1042C; # DESERET CAPITAL LETTER LONG O
+10405; C; 1042D; # DESERET CAPITAL LETTER LONG OO
+10406; C; 1042E; # DESERET CAPITAL LETTER SHORT I
+10407; C; 1042F; # DESERET CAPITAL LETTER SHORT E
+10408; C; 10430; # DESERET CAPITAL LETTER SHORT A
+10409; C; 10431; # DESERET CAPITAL LETTER SHORT AH
+1040A; C; 10432; # DESERET CAPITAL LETTER SHORT O
+1040B; C; 10433; # DESERET CAPITAL LETTER SHORT OO
+1040C; C; 10434; # DESERET CAPITAL LETTER AY
+1040D; C; 10435; # DESERET CAPITAL LETTER OW
+1040E; C; 10436; # DESERET CAPITAL LETTER WU
+1040F; C; 10437; # DESERET CAPITAL LETTER YEE
+10410; C; 10438; # DESERET CAPITAL LETTER H
+10411; C; 10439; # DESERET CAPITAL LETTER PEE
+10412; C; 1043A; # DESERET CAPITAL LETTER BEE
+10413; C; 1043B; # DESERET CAPITAL LETTER TEE
+10414; C; 1043C; # DESERET CAPITAL LETTER DEE
+10415; C; 1043D; # DESERET CAPITAL LETTER CHEE
+10416; C; 1043E; # DESERET CAPITAL LETTER JEE
+10417; C; 1043F; # DESERET CAPITAL LETTER KAY
+10418; C; 10440; # DESERET CAPITAL LETTER GAY
+10419; C; 10441; # DESERET CAPITAL LETTER EF
+1041A; C; 10442; # DESERET CAPITAL LETTER VEE
+1041B; C; 10443; # DESERET CAPITAL LETTER ETH
+1041C; C; 10444; # DESERET CAPITAL LETTER THEE
+1041D; C; 10445; # DESERET CAPITAL LETTER ES
+1041E; C; 10446; # DESERET CAPITAL LETTER ZEE
+1041F; C; 10447; # DESERET CAPITAL LETTER ESH
+10420; C; 10448; # DESERET CAPITAL LETTER ZHEE
+10421; C; 10449; # DESERET CAPITAL LETTER ER
+10422; C; 1044A; # DESERET CAPITAL LETTER EL
+10423; C; 1044B; # DESERET CAPITAL LETTER EM
+10424; C; 1044C; # DESERET CAPITAL LETTER EN
+10425; C; 1044D; # DESERET CAPITAL LETTER ENG
diff --git a/rpython/rlib/unicodedata/CaseFolding-5.2.0.txt 
b/rpython/rlib/unicodedata/CaseFolding-5.2.0.txt
new file mode 100644
--- /dev/null
+++ b/rpython/rlib/unicodedata/CaseFolding-5.2.0.txt
@@ -0,0 +1,1202 @@
+# CaseFolding-5.2.0.txt
+# Date: 2009-05-28, 23:02:34 GMT [MD]
+#
+# Unicode Character Database
+# Copyright (c) 1991-2009 Unicode, Inc.
+# For terms of use, see http://www.unicode.org/terms_of_use.html
+# For documentation, see http://www.unicode.org/reports/tr44/
+#
+# Case Folding Properties
+#
+# This file is a supplement to the UnicodeData file.
+# It provides a case folding mapping generated from the Unicode Character 
Database.
+# If all characters are mapped according to the full mapping below, then
+# case differences (according to UnicodeData.txt and SpecialCasing.txt)
+# are eliminated.
+#
+# The data supports both implementations that require simple case foldings
+# (where string lengths don't change), and implementations that allow full 
case folding
+# (where string lengths may grow). Note that where they can be supported, the
+# full case foldings are superior: for example, they allow "MASSE" and 
"Ma&#223;e" to match.
+#
+# All code points not listed in this file map to themselves.
+#
+# NOTE: case folding does not preserve normalization formats!
+#
+# For information on case folding, including how to have case folding 
+# preserve normalization formats, see Section 3.13 Default Case Algorithms in
+# The Unicode Standard, Version 5.0.
+#
+# 
================================================================================
+# Format
+# 
================================================================================
+# The entries in this file are in the following machine-readable format:
+#
+# <code>; <status>; <mapping>; # <name>
+#
+# The status field is:
+# C: common case folding, common mappings shared by both simple and full 
mappings.
+# F: full case folding, mappings that cause strings to grow in length. 
Multiple characters are separated by spaces.
+# S: simple case folding, mappings to single characters where different from F.
+# T: special case for uppercase I and dotted uppercase I
+#    - For non-Turkic languages, this mapping is normally not used.
+#    - For Turkic languages (tr, az), this mapping can be used instead of the 
normal mapping for these characters.
+#      Note that the Turkic mappings do not maintain canonical equivalence 
without additional processing.
+#      See the discussions of case mapping in the Unicode Standard for more 
information.
+#
+# Usage:
+#  A. To do a simple case folding, use the mappings with status C + S.
+#  B. To do a full case folding, use the mappings with status C + F.
+#
+#    The mappings with status T can be used or omitted depending on the 
desired case-folding
+#    behavior. (The default option is to exclude them.)
+#
+# =================================================================
+# @missing 0000..10FFFF; <codepoint>
+0041; C; 0061; # LATIN CAPITAL LETTER A
+0042; C; 0062; # LATIN CAPITAL LETTER B
+0043; C; 0063; # LATIN CAPITAL LETTER C
+0044; C; 0064; # LATIN CAPITAL LETTER D
+0045; C; 0065; # LATIN CAPITAL LETTER E
+0046; C; 0066; # LATIN CAPITAL LETTER F
+0047; C; 0067; # LATIN CAPITAL LETTER G
+0048; C; 0068; # LATIN CAPITAL LETTER H
+0049; C; 0069; # LATIN CAPITAL LETTER I
+0049; T; 0131; # LATIN CAPITAL LETTER I
+004A; C; 006A; # LATIN CAPITAL LETTER J
+004B; C; 006B; # LATIN CAPITAL LETTER K
+004C; C; 006C; # LATIN CAPITAL LETTER L
+004D; C; 006D; # LATIN CAPITAL LETTER M
+004E; C; 006E; # LATIN CAPITAL LETTER N
+004F; C; 006F; # LATIN CAPITAL LETTER O
+0050; C; 0070; # LATIN CAPITAL LETTER P
+0051; C; 0071; # LATIN CAPITAL LETTER Q
+0052; C; 0072; # LATIN CAPITAL LETTER R
+0053; C; 0073; # LATIN CAPITAL LETTER S
+0054; C; 0074; # LATIN CAPITAL LETTER T
+0055; C; 0075; # LATIN CAPITAL LETTER U
+0056; C; 0076; # LATIN CAPITAL LETTER V
+0057; C; 0077; # LATIN CAPITAL LETTER W
+0058; C; 0078; # LATIN CAPITAL LETTER X
+0059; C; 0079; # LATIN CAPITAL LETTER Y
+005A; C; 007A; # LATIN CAPITAL LETTER Z
+00B5; C; 03BC; # MICRO SIGN
+00C0; C; 00E0; # LATIN CAPITAL LETTER A WITH GRAVE
+00C1; C; 00E1; # LATIN CAPITAL LETTER A WITH ACUTE
+00C2; C; 00E2; # LATIN CAPITAL LETTER A WITH CIRCUMFLEX
_______________________________________________
pypy-commit mailing list
[email protected]
https://mail.python.org/mailman/listinfo/pypy-commit

Reply via email to