On 05/10/2012 06:24 PM, James Cloos wrote:
>>>>>> "JH" == Jeremy Huddleston <jerem...@apple.com> writes:
> 
> JH> This commit introduced a 'make check' failure due by duplicating
> JH> existing entries:
> 
> I ran compose-check.pl while reviewing it, but on the .pre files rather
> than on the compiled files.....  ☹
> 
> I'll have to work on a patch to that script to enable its use pre-compile.
> 
> Duplicate removal patch on the way....
> 
> -JimC

The attached check.py can give you some more feedback on the quality of
Compose.pre. Please, try it and see what you can incorporate in the
default check process.

Was someone able to review and apply the patches I have send on May 5th
to resolve outstanding issue before the next release?

Regards,

Pander
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
Name		check-compose.py
Description	Check compose sequences
Author		Pander <pan...@users.sourceforge.net>
License		MIT License

0.1 2012-01-06	Pander <pan...@users.sourceforge.net>
Initial release

0.2 2012-03-19	Pander <pan...@users.sourceforge.net>
Added downloading
"""

import binascii
import sys
from urllib import urlretrieve
from os.path import isfile, getsize

silent = False

def isUnicodeUpper(s):
	hex = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', )
	if (
		len(s) == 5 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex
	) or (
		len(s) == 6 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex and s[5] in hex
	):
		return True
	return False

def isUnicodeLower(s):
	hex = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f', )
	if (
		len(s) == 5 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex
	) or (
		len(s) == 6 and s[0] == 'U' and s[1] in hex and s[2] in hex and s[3] in hex and s[4] in hex and s[5] in hex
	):
		return True
	return False

def download_hook(blocks_transferred, block_size, file_size):
	""" A download hook to provide some feedback when downloading """
	if blocks_transferred == 0:
		if file_size > 0:
			if not silent:
				print 'INFO: Downloading', file_size, 'bytes:',
		else:	
			if not silent:
				print 'INFO: Downloading:',
	sys.stdout.write('#')
	sys.stdout.flush()

def download_file(url):
	""" Downloads a file provided a URL. Returns the filename. """
	""" Borks on failure """
	localfilename = url.split('/')[-1]
	if not isfile(localfilename) or getsize(localfilename) <= 0:
		if not silent:
			print 'INFO: Downloading', url, '...'
		try: 
			urlretrieve(url, localfilename, download_hook)
		except IOError, (errno, strerror):
			print 'I/O error(%s): %s' % (errno, strerror)
			sys.exit(-1)
		except:
			print 'Unexpected error:', sys.exc_info()
			sys.exit(-1)
		print ' done.'
	else:
		if not silent:
			print 'INFO: Using cached file for', url
	return localfilename

# Load Unicode information
unicode_info = {}
unicode_file = None
unicode_filename = download_file('http://www.unicode.org/Public/UNIDATA/UnicodeData.txt')
try:
	unicode_file = open(unicode_filename, 'r')
except IOError, (errno, strerror):
	print 'I/O error(%s): %s' % (errno, strerror)
	sys.exit(-1)
except:
	print 'Unexpected error:', sys.exc_info()
	sys.exit(-1)
for line in unicode_file.readlines():
	data = line.split(';')
	unicode_info[data[0]] = data[1]

# Load compose sequences
compose_sequences = {}
codes = []
chars = []
names = []
compose_file = None
compose_filename = download_file('http://cgit.freedesktop.org/xorg/lib/libX11/plain/nls/en_US.UTF-8/Compose.pre')
try:
	compose_file = open(compose_filename, 'r')
except IOError, (errno, strerror):
	print 'I/O error(%s): %s' % (errno, strerror)
	sys.exit(-1)
except:
	print 'Unexpected error:', sys.exc_info()[0]
	sys.exit(-1)
lines = 0
for line in compose_file.readlines():
	lines = lines + 1
	if line[0] != '<':
		continue
	if '"\t\t# ' in line:
		line = line .replace('"\t\t# ', '"    # ')
	if ':   "' in line and '<U17f' not in line:
		line = line.replace(':   "', ': "')
	seq = None
	char = None
	code = None
	name = None
	data = line.split(': "')
	seq = data[0].strip()
	try:

		charcodename = data[1].split('" ')
		if len(charcodename) == 1:
			charcodename = data[1].split('"\t')
	except IndexError:
		if '<U17f' in seq:
			seq = seq.split(' :')[0].strip()
			if not silent:
				print 'WARNING line %s: missing second double quote and comment with name' %lines
				print ' ', line[:-1]
			char = ''
			code = ''
			name = ''
		else:
			if not silent:
				print 'ERROR lines %s: unknown malformation' %lines
				print ' ', line[:-1]
	if not (char == '' and code == '' and name == ''):
		char = charcodename[0].strip()
		charname = charcodename[1].strip().split(' # ')
		if len(charname) == 1:
			if False:
				if not silent:
					print 'WARNING line %s: missing code (alias or Unicode code point)' %lines
					print ' ', line[:-1]
			code = ''
			name = charname[0].replace('# ', '').strip()
		else:
			code = charname[0].strip()
			name = charname[1].strip()
	if False:#TODO for generating documentation
		print 'seq:', seq
		print '  char:', char
		print '  code:', code
		print '  name:', name

	for se in compose_sequences.keys():
		(ch, co, na) = compose_sequences[se]
		if se == seq:
			if not silent:
				print 'ERROR line %s: at least duplicate compose sequence'
				print ' ', line[:-1]
			break
		elif seq == se[:len(seq)]:
			if not silent:
				print 'ERROR line %s: compose sequence is blocking at least %s : "%s" %s # %s' %(lines, se, ch, co, na)
				print ' ', line[:-1]
			break
		elif seq[:len(se)] == se:
			if not silent:
				print 'ERROR line %s: compose sequence is at least blocked by %s : "%s" %s # %s' %(lines, se, ch, co, na)
				print ' ', line[:-1]
			break
		elif code != '' and code == co and name != na:
			if not silent:
				print 'WARNING line %s: non-identical character names for same code for %s : "%s" %s # %s' %(lines, se, ch, co, na)
				print ' ', line[:-1]
			break
		elif code != '' and code != co and name == na:
			if not silent:
				print 'WARNING line %s: non-identical codes for same character name for %s : "%s" %s # %s' %(lines, se, ch, co, na)
				print ' ', line[:-1]
			break
		#TODO etc.

	if code != '' and (isUnicodeUpper(code) or isUnicodeLower(code)):
		CODE = code[1:].upper()
		if CODE in unicode_info:
			info = unicode_info[CODE]
			if info != name:
				if not silent:
					print 'WARNING line %s: incorrect comment with name, should be %s from UnicodeData.txt' %(lines, info)
					print ' ', line[:-1]
		else:
			if not silent:
				print 'WARNING line %s: unknown Unicode code point %s according to UnicodeData.txt' %(lines, code)
				print ' ', line[:-1]

	compose_sequences[seq] = (char, code, name)
	if code != '' and code not in codes:
		codes.append(code)
	if char != '' and char not in chars:
		chars.append(char)
	if name != '' and name not in names:
		names.append(name)

if not silent:
	print 'INFO: checked %s compose sequences' %len(compose_sequences)
	print 'INFO: resulting in %s different chars' %len(chars)
	print 'INFO: related to %s different codes' %len(codes)
	print 'INFO: with %s different names in comment' %len(names)
_______________________________________________
xorg-devel@lists.x.org: X.Org development
Archives: http://lists.x.org/archives/xorg-devel
Info: http://lists.x.org/mailman/listinfo/xorg-devel

Reply via email to