Package: tunnelx
Version: 20160713-3
Severity: normal
Tags: patch

Characters outside US-ASCII get corrupted when saving and loading
tunnel sketches.  The reason is that each char is simply cast to byte
when saving, and sign-extended to char when loading.  I've made a patch
which writes and reads XML numeric character references for characters
not in US-ASCII as declared in the XML declaration.  I tested with my
current survey which contains Gaelic names, and hand-edited the XML to
ensure that reading hexadecimal representations works (we always write
decimal).  The patch is probably suitable for forwarding upstream.

Perhaps an alternative approach might be considered - open the output
file with UTF-8 encoding, and change the XML declaration to reflect
that.

Index: tunnelx-20160713/src/TNXML.java
===================================================================
--- tunnelx-20160713.orig/src/TNXML.java
+++ tunnelx-20160713/src/TNXML.java
@@ -726,7 +726,7 @@ class TNXML
 	/////////////////////////////////////////////
 	static char[] chconvCH = { (char)176, (char)246, (char)252, '<', '>', '"', '&', '\\', '\'', '\n', '\t', ' ' };
 	static char[] chconv = chconvCH;  // allow for hacks (which vary chconvleng)
-	static String[] chconvName = {"&deg;", "&ouml;", "&uuml;", "&lt;", "&gt;", "&quot;", "&amp;", "&backslash;", "&apostrophe;", "&newline;", "&tab;", "&space;" };
+	static String[] chconvName = {"deg", "ouml", "uuml", "lt", "gt", "quot", "amp", "backslash", "apostrophe", "newline", "tab", "space" };
 	static int chconvleng = chconvCH.length;  // used for hacking out the space ones (this hack needs to be killed, or replaced with a flag)
 	static int chconvlengWSP = chconvCH.length - 4;  // used for hacking out the space ones (this hack needs to be killed, or replaced with a flag)
 	/////////////////////////////////////////////
@@ -739,16 +739,23 @@ class TNXML
 			int j;
 
 			// there might be a regexp that would do this substitution directly, or use indexOf in a concatenated string of chconvCH
-			for (j = 0; j < chconvleng; j++)
+			for (j = 3; j < chconvleng; j++) // start at '<' to allow deg, ouml, and uuml to use the general substitution below
 			{
 				if ((ch == chconvCH[j]) && (bAlsoSpace || (ch != ' ')))
 				{
-					sb.append(chconvName[j]);
+					sb.append('&').append(chconvName[j]).append(';');
 					break;
 				}
 			}
-			if (j == chconvleng)
-				sb.append(ch);
+			if (j == chconvleng) {
+				// not found in table
+				if (' ' <= ch && ch <= 127)
+					// printable ASCII
+					sb.append(ch);
+				else
+					// general Unicode character
+					sb.append("&#").append((int)ch).append(";");
+			}
 		}
 	}
 
@@ -771,31 +778,34 @@ class TNXML
 			char ch = s.charAt(i);
 			if (ch == '&')
 			{
-				int j;
-				for (j = 0; j < chconvleng; j++)
-				{
-					if (s.regionMatches(i, chconvName[j], 0, chconvName[j].length()))
-					{
-						sb.append(chconvCH[j]);
-						i += chconvName[j].length() - 1;
-						//if (j < 2)
-						//	System.out.println(chconv[j] + " -- " + (int)chconv[j].toCharArray()[0]);
-						break;
-					}
-				}
-				if (j == chconvleng)
-				{
-					if (s.regionMatches(i, "&space;", 0, 7))  // back-compatible
-					{
-						sb.append(" ");
-						i += 6;
-					}
+				int refc = s.indexOf(';', i);
+				if (refc < 0)
+					TN.emitError("Missing reference close at " + s.substring(i, Math.max(i+15, s.length())));
+
+				if (s.charAt(++i) == '#')  {
+					// A malformed numeric character reference will result in NumberFormatException
+					if (s.charAt(++i) == 'x')
+						// hexadecimal
+						sb.append((char)Integer.parseInt(s.substring(++i, refc), 16));
 					else
+						// decimal
+						sb.append((char)Integer.parseInt(s.substring(i, refc), 10));
+				} else {
+					String name = s.substring(i, refc);
+					int j;
+					for (j = 0; j < chconvleng; j++)
 					{
-						System.out.println(s.substring(i));
-						TN.emitError("unable to resolve & from pos " + i + " in string:" + s);
+						if (name.equals(chconvName[j]))
+						{
+							sb.append(chconvCH[j]);
+							break;
+						}
 					}
+					if (j == chconvleng)
+						TN.emitError("unable to resolve entity " + name);
 				}
+				// advance to the reference-close character (loop increment will skip it)
+				i = refc;
 			}
 			else
 				sb.append(ch);

-- System Information:
Debian Release: 9.0
  APT prefers testing
  APT policy: (900, 'testing'), (900, 'stable'), (400, 'unstable')
Architecture: amd64 (x86_64)
Foreign Architectures: i386, armel

Kernel: Linux 3.16.7-ckt2-balti (SMP w/8 CPU cores; PREEMPT)
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8)
Shell: /bin/sh linked to /bin/dash
Init: sysvinit (via /sbin/init)

Versions of packages tunnelx depends on:
ii  default-jre [java8-runtime]    2:1.8-58
ii  gcj-4.8-jre [java5-runtime]    4.8.5-4
ii  jarwrapper                     0.59
ii  openjdk-8-jre [java8-runtime]  8u111-b14-3

tunnelx recommends no packages.

tunnelx suggests no packages.

-- no debconf information

Reply via email to