Since I have connectivity problems now and then, I wrote a mini-app
using PyQt to give me the basic features of Ishida's UniView (which
also seems to have had some server problems recently)... Maybe it
would be useful to others also so I'm posting here. It's under the GPL
since I use PyQt under the GPL.

Since it depends on PyQt, it is probably immediately usable by Linux
users, esp. who use distros which have PyQt pre-installed or
installable by a single command like apt-get or yum. On other
platforms, you'll have to have installed Python and PyQt as
appropriate...

BTW I use Py3, so maybe a few tweaks would be needed to get it working
with Py2. Since it's GPL, please feel free to make derivatives.

I hope the name "UniView" is not copyrighted or anything. Certainly
don't intend to infringe...

-- 
Shriramana Sharma ஶ்ரீரமணஶர்மா श्रीरमणशर्मा
#! /usr/bin/env python3

# UniView -- to list the list of Unicode characters in an input string
# (C) Shriramana Sharma, 2014
# Licence: GPLv3

from PyQt4.QtCore import *
from PyQt4.QtGui import *
import re
import unicodedata

def code ( char, smpLen = 6 ) :
	c = hex(ord(char))[2:]
	if len(c) < 4 : c = c.zfill(4)
	elif len(c) < smpLen : c = c.zfill(smpLen)
	return c

def joinSurrogates(match) :
	SURROGATE_OFFSET = 0x10000 - ( 0xD800 << 10 ) - 0xDC00
	return chr ( ( ord(match.group(1)) << 10 ) + ord(match.group(2)) + SURROGATE_OFFSET )

def fixSurrogatePresence(s) :
	'''Returns the input UTF-16 string with surrogate pairs replaced by the character they represent'''
	# ideas from:
	# http://www.unicode.org/faq/utf_bom.html#utf16-4
	# http://stackoverflow.com/a/6928284/1503120
	return re.sub ( '([\uD800-\uDBFF])([\uDC00-\uDFFF])', joinSurrogates, s )

def setComboItem(cb,s) :
	for i in range(cb.count()) :
		if cb.itemText(i) == s : cb.setCurrentIndex(i)

class MainWindow(QWidget) :
	
	def __init__(self) :
		
		QWidget.__init__(self)
		
		self.setObjectName("mainWindow")
		self.setWindowTitle("UniView")
		
		self.inputTextBox = QPlainTextEdit()
		
		w = self.inputLabel = QLabel("&Input text")
		w.setBuddy(self.inputTextBox)
		
		self.analyseButton = QPushButton("&Analyse")
		
		w = self.tabWidget = QTabWidget()
		self.tableTab = QTableView()
		self.stringTab = QWidget()
		w.addTab ( self.tableTab, "As a &table" )
		w.addTab ( self.stringTab, "As a st&ring" )
		
		# contents of string tab
		
		w = self.outputTextBox = QPlainTextEdit()
		w.setReadOnly(True)
		
		w = self.outputLabel = QLabel("&Codepoints")
		w.setBuddy(self.outputTextBox)
		
		w = self.presetComboBox = QComboBox()
		w.addItem("Simple")
		w.addItem("Python")
		w.addItem("Custom")
		
		w = self.presetLabel = QLabel("<b>&Presets</b>")
		w.setBuddy(self.presetComboBox)
		
		w = self.hexCaseComboBox = QComboBox()
		w.addItem("ABCDEF")
		w.addItem("abcdef")
		
		w = self.hexCaseLabel = QLabel("&Hex digits case")
		w.setBuddy(self.hexCaseComboBox)
		
		w = self.bmpPrefixComboBox = QComboBox()
		w.addItem("U+")
		w.addItem("\\u")
		w.setEditable(True)
		
		w = self.bmpPrefixLabel = QLabel("&BMP Prefix")
		w.setBuddy(self.bmpPrefixComboBox)
		
		w = self.smpPrefixComboBox = QComboBox()
		w.addItem("U+")
		w.addItem("\\U")
		w.setEditable(True)
		
		w = self.smpPrefixLabel = QLabel("&SMP Prefix")
		w.setBuddy(self.smpPrefixComboBox)
		
		w = self.smpCodeLengthComboBox = QComboBox()
		w.addItem("6")
		w.addItem("8")
		
		w = self.smpCodeLengthLabel = QLabel("SMP Hex &Length")
		w.setBuddy(self.smpCodeLengthComboBox)
		
		w = self.delimeterComboBox = QComboBox()
		w.addItem("(space)")
		w.addItem("(none)")
		w.addItem(",")
		w.setEditable(True)
		
		w = self.delimeterLabel = QLabel("&Delimeter")
		w.setBuddy(self.delimeterComboBox)
		
		l = self.presetGrid = QGridLayout()
		l.addWidget ( self.presetLabel, 0, 0 )
		l.addWidget ( self.presetComboBox, 0, 1 )
		l.addWidget ( self.hexCaseLabel, 0, 3 )
		l.addWidget ( self.hexCaseComboBox, 0, 4 )
		l.addWidget ( self.bmpPrefixLabel, 1, 0 )
		l.addWidget ( self.bmpPrefixComboBox, 1, 1 )
		l.addWidget ( self.delimeterLabel, 1, 3 )
		l.addWidget ( self.delimeterComboBox, 1, 4 )
		l.addWidget ( self.smpPrefixLabel, 2, 0 )
		l.addWidget ( self.smpPrefixComboBox, 2, 1 )
		l.addWidget ( self.smpCodeLengthLabel, 2, 3 )
		l.addWidget ( self.smpCodeLengthComboBox, 2, 4 )
		l.setColumnMinimumWidth ( 2, 20 )
		
		w = self.presetGroupBox = QGroupBox("String output config")
		w.setLayout(l)
		
		l = self.stringTabLayout = QVBoxLayout()
		l.addWidget(self.outputLabel)
		l.addWidget(self.outputTextBox)
		l.addWidget(self.presetGroupBox)
		self.stringTab.setLayout(l)
		
		l = self.mainLayout = QVBoxLayout()
		l.addWidget(self.inputLabel)
		l.addWidget(self.inputTextBox)
		l.addWidget(self.analyseButton)
		l.addWidget(self.tabWidget)
		self.setLayout(l)
		
		QObject.connect ( self.analyseButton, SIGNAL("clicked()"), self.analyseText )
		QObject.connect ( self.presetComboBox, SIGNAL("currentIndexChanged(const QString &)"), self.presetChanged )
		for cbn in "bmpPrefix", "smpPrefix", "smpCodeLength", "delimeter" :
			QObject.connect ( self.__dict__ [ cbn + "ComboBox" ], SIGNAL("currentIndexChanged(const QString &)"),
			                  lambda: setComboItem ( self.presetComboBox, "Custom" ) )
		
	def analyseText(self) :
		
		bmpPrefix = self.bmpPrefixComboBox.currentText()
		smpPrefix = self.smpPrefixComboBox.currentText()
		smpCodeLength = int(self.smpCodeLengthComboBox.currentText())
		capitalHex = self.hexCaseComboBox.currentText() == "ABCDEF"
		
		delimeter = self.delimeterComboBox.currentText()
		if delimeter == "(none)" : delimeter = ""
		elif delimeter == "(space)" : delimeter = " "
		
		text = fixSurrogatePresence(self.inputTextBox.toPlainText())
		out = ""
		for char in text :
			if out != "" : out += delimeter
			out += smpPrefix if ord(char) > 0xffff else bmpPrefix
			c = code(char,smpCodeLength)
			out += c.upper() if capitalHex else c
		self.outputTextBox.setPlainText(out)
		
		tableModel = QStandardItemModel ( len(text), 2 )
		tableModel.setHeaderData ( 0, Qt.Horizontal, "Codepoint" )
		tableModel.setHeaderData ( 1, Qt.Horizontal, "Character name" )
		for i in range(len(text)) :
			tableModel.setItem ( i, 0, QStandardItem ( "U+" + code(text[i]).upper() ) )
			tableModel.setItem ( i, 1, QStandardItem ( unicodedata.name ( text[i], "UNKNOWN" ) ) )
		self.tableTab.setModel(tableModel)
		self.tableTab.horizontalHeader().setStretchLastSection(True)
	
	presetMap = { "Simple": { "bmpPrefix": "U+",
	                          "smpPrefix": "U+",
	                          "delimeter": "(space)",
	                          "smpCodeLength": "6" },
	              "Python": { "bmpPrefix": "\\u",
	                          "smpPrefix": "\\U",
	                          "delimeter": "(none)",
	                          "smpCodeLength": "8" } }
	
	def presetChanged ( self, newPresetName ) :
		if newPresetName == "Custom" : return
		for k,v in MainWindow.presetMap[newPresetName].items() : setComboItem ( self.__dict__ [ k + "ComboBox" ], v )
		setComboItem ( self.presetComboBox, newPresetName ) # needed since above actions may reset this to Custom
	
app = QApplication([])
mainWindow = MainWindow()
mainWindow.show()
app.exec_()

# राम ராம രാമ ರಾಮ రామ rāma 𑀭𑀸𑀫
_______________________________________________
Unicode mailing list
Unicode@unicode.org
http://unicode.org/mailman/listinfo/unicode

Reply via email to