This strips macrons (mostly ō and ū) from maps created with code page
ms932 (extended Shift-JIS, Japanese chars) because they do not exist in
that code page.
Rather than butcher TableTransliterator.java, I created
SparseTransliterator.java for the few char conversions I needed.
Index: src/uk/me/parabola/imgfmt/app/labelenc/CodeFunctions.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/labelenc/CodeFunctions.java (revision 2053)
+++ src/uk/me/parabola/imgfmt/app/labelenc/CodeFunctions.java (working copy)
@@ -102,10 +102,8 @@
funcs.setEncodingType(ENCODING_FORMAT10);
funcs.setEncoder(new AnyCharsetEncoder("ms932"));
funcs.setDecoder(new AnyCharsetDecoder("ms932"));
- Transliterator transliterator = new
NullTransliterator();
- funcs.setTransliterator(transliterator);
+ funcs.setTransliterator(new
SparseTransliterator("nomacron"));
funcs.setCodepage(932);
-
} else {
funcs.setEncodingType(ENCODING_FORMAT9);
funcs.setDecoder(new AnyCharsetDecoder(charset));
Index: src/uk/me/parabola/imgfmt/app/labelenc/SparseTransliterator.java
===================================================================
--- src/uk/me/parabola/imgfmt/app/labelenc/SparseTransliterator.java
(revision 0)
+++ src/uk/me/parabola/imgfmt/app/labelenc/SparseTransliterator.java
(revision 0)
@@ -0,0 +1,72 @@
+/*
+ * Copyright (C) 2011 mkgmap authors
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 3 or
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+package uk.me.parabola.imgfmt.app.labelenc;
+
+import java.util.Locale;
+
+import uk.me.parabola.log.Logger;
+
+/**
+ * A sparse character-based transliterator that leaves most characters
unchanged.
+ *
+ */
+public class SparseTransliterator implements Transliterator {
+ private static final Logger log =
Logger.getLogger(SparseTransliterator.class);
+
+ private final boolean useNoMacron;
+ private boolean forceUppercase;
+
+ public SparseTransliterator(String targetCharset) {
+ useNoMacron = (targetCharset.equals("nomacron")) ? true : false;
+ }
+
+ /**
+ * Convert a string into a string that uses only acceptable characters.
+ *
+ * @param s The original string. It can use any unicode character. Can
be null in which case null will
+ * be returned.
+ * @return A string that uses only acceptable characters.
+ */
+ public String transliterate(String s) {
+ if (s == null)
+ return null;
+
+ StringBuilder sb = new StringBuilder(s.length() + 5);
+ for (char c : s.toCharArray()) {
+ if (useNoMacron) {
+ // Only macrons are modified, all other chars
(including non-ascii) are left unchanged
+ if (c == 0x101) // Unicode Character 'LATIN
SMALL LETTER A WITH MACRON' (U+0101)
+ c = 'a';
+ if (c == 0x113) // Unicode Character 'LATIN
SMALL LETTER E WITH MACRON' (U+0113)
+ c = 'e';
+ if (c == 0x12b) // Unicode Character 'LATIN
SMALL LETTER I WITH MACRON' (U+012B)
+ c = 'i';
+ if (c == 0x14d) // Unicode Character 'LATIN
SMALL LETTER O WITH MACRON' (U+014D)
+ c = 'o';
+ if (c == 0x16b) // Unicode Character 'LATIN
SMALL LETTER U WITH MACRON' (U+016B)
+ c = 'u';
+ }
+ sb.append(c);
+ }
+
+ String text = sb.toString();
+ if (forceUppercase)
+ text = text.toUpperCase(Locale.ENGLISH);
+ return text;
+ }
+
+ public void forceUppercase(boolean uc) {
+ forceUppercase = uc;
+ }
+}
_______________________________________________
mkgmap-dev mailing list
mkgmap-dev@lists.mkgmap.org.uk
http://www.mkgmap.org.uk/mailman/listinfo/mkgmap-dev