Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontDirFinder.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontDirFinder.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontDirFinder.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontDirFinder.java Sat Aug 30 02:26:57 2014 @@ -18,24 +18,23 @@ package org.apache.fontbox.util.autodetect; import java.io.File; -import java.io.IOException; import java.util.List; import java.util.Map; /** - * Implementers provide find method for searching native operating system for available fonts. This class is based on a - * class provided by Apache FOP. see org.apache.fop.fonts.autodetect.FontDirFinder + * Implementers provide find method for searching native operating system for available fonts. + * This class is based on a class provided by Apache FOP. + * + * See org.apache.fop.fonts.autodetect.FontDirFinder */ public interface FontDirFinder { - /** * Finds a list of font files. * * @return list of font files. - * @throws IOException In case of an I/O problem */ - List<File> find() throws IOException; + List<File> find(); /** * Provides a list of platform specific ttf name mappings. @@ -43,5 +42,4 @@ public interface FontDirFinder * @return a fontname mapping */ Map<String, String> getCommonTTFMapping(); - }
Modified: pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java (original) +++ pdfbox/trunk/fontbox/src/main/java/org/apache/fontbox/util/autodetect/FontFileFinder.java Sat Aug 30 02:26:57 2014 @@ -63,9 +63,8 @@ public class FontFileFinder * Automagically finds a list of font files on local system. * * @return List<URI> of font files - * @throws IOException thrown if an I/O exception of some sort has occurred */ - public List<URI> find() throws IOException + public List<URI> find() { if (fontDirFinder == null) { @@ -85,9 +84,8 @@ public class FontFileFinder * * @param dir directory to search * @return list<URI> of font files - * @throws IOException thrown if an I/O exception of some sort has occurred */ - public List<URI> find(String dir) throws IOException + public List<URI> find(String dir) { List<URI> results = new java.util.ArrayList<URI>(); File directory = new File(dir); @@ -154,7 +152,7 @@ public class FontFileFinder /** * Check if the given name belongs to a font file. * - * @param filename the given filename + * @param file the given file * @return true if the given filename has a typical font file ending */ private boolean checkFontfile(File file) Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMap.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMap.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMap.java (original) +++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMap.java Sat Aug 30 02:26:57 2014 @@ -38,7 +38,7 @@ public class TestCMap extends TestCase bs[0] = (byte)200; CMap cMap = new CMap(); - cMap.addMapping(bs, "a"); - assertTrue("a".equals(cMap.lookup(bs, 0, 1))); + cMap.addCharMapping(bs, "a"); + assertTrue("a".equals(cMap.toUnicode(200))); } } Modified: pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java (original) +++ pdfbox/trunk/fontbox/src/test/java/org/apache/fontbox/cmap/TestCMapParser.java Sat Aug 30 02:26:57 2014 @@ -19,7 +19,6 @@ package org.apache.fontbox.cmap; import java.io.File; import java.io.FileInputStream; import java.io.IOException; -import java.util.Arrays; import junit.framework.TestCase; @@ -27,57 +26,59 @@ import junit.framework.TestCase; * This will test the CMapParser implementation. * */ -public class TestCMapParser extends TestCase +public class TestCMapParser extends TestCase { /** * Check whether the parser and the resulting mapping is working correct. + * * @throws IOException If something went wrong */ public void testLookup() throws IOException { - final String resourceDir= "src/test/resources/cmap"; + final String resourceDir = "src/test/resources/cmap"; File inDir = new File(resourceDir); - + CMapParser parser = new CMapParser(); - CMap cMap = parser.parse( resourceDir, new FileInputStream(new File(inDir,"CMapTest"))); - - // code space range - assertEquals("codeSpaceRanges size", 1, cMap.getCodeSpaceRanges().size()); - final byte[] expectedStart = {0, 0}; // 00 00 - final byte[] expectedEnd = {2, -1}; // 02 FF - final byte[] actualStart = cMap.getCodeSpaceRanges().get(0).getStart(); - final byte[] actualEnd = cMap.getCodeSpaceRanges().get(0).getEnd(); - assertTrue("codeSpaceRange start", Arrays.equals(expectedStart, actualStart)); - assertTrue("codeSpaceRange end", Arrays.equals(expectedEnd, actualEnd)); - + CMap cMap = parser.parse(resourceDir, new FileInputStream(new File(inDir, "CMapTest"))); + // char mappings - byte[] bytes1 = {0,1}; - assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.lookup(bytes1, 0, 2)); + byte[] bytes1 = {0, 1}; + assertEquals("bytes 00 01 from bfrange <0001> <0009> <0041>", "A", cMap.toUnicode(toInt(bytes1))); - byte[] bytes2 = {1,00}; + byte[] bytes2 = {1, 00}; String str2 = "0"; - assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.lookup(bytes2, 0, 2)); + assertEquals("bytes 01 00 from bfrange <0100> <0109> <0030>", str2, cMap.toUnicode(toInt(bytes2))); - byte[] bytes3 = {0,10}; + byte[] bytes3 = {0, 10}; String str3 = "*"; - assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.lookup(bytes3, 0, 2)); + assertEquals("bytes 00 0A from bfchar <000A> <002A>", str3, cMap.toUnicode(toInt(bytes3))); - byte[] bytes4 = {1,10}; + byte[] bytes4 = {1, 10}; String str4 = "+"; - assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.lookup(bytes4, 0, 2)); + assertEquals("bytes 01 0A from bfchar <010A> <002B>", str4, cMap.toUnicode(toInt(bytes4))); // CID mappings int cid1 = 65; - assertEquals("CID 65 from cidrange <0000> <00ff> 0 ", "A", cMap.lookupCID(cid1)); + assertEquals("CID 65 from cidrange <0000> <00ff> 0 ", 65, cMap.toCID(cid1)); int cid2 = 280; - String strCID2 = "\u0118"; - assertEquals("CID 280 from cidrange <0100> <01ff> 256", strCID2, cMap.lookupCID(cid2)); - + int strCID2 = 0x0118; + assertEquals("CID 280 from cidrange <0100> <01ff> 256", strCID2, cMap.toCID(cid2)); + int cid3 = 520; - String strCID3 = "\u0208"; - assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.lookupCID(cid3)); + int strCID3 = 0x0208; + assertEquals("CID 520 from cidchar <0208> 520", strCID3, cMap.toCID(cid3)); } + private int toInt(byte[] data) + { + int code = 0; + for (byte b : data) + { + code <<= 8; + code |= (b + 256) % 256; + } + return code; + } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/cos/COSName.java Sat Aug 30 02:26:57 2014 @@ -179,6 +179,7 @@ public final class COSName extends COSBa public static final COSName DUPLEX = new COSName("Duplex"); public static final COSName DV = new COSName("DV"); public static final COSName DW = new COSName("DW"); + public static final COSName DW2 = new COSName("DW2"); // E public static final COSName E = new COSName("E"); public static final COSName EARLY_CHANGE = new COSName("EarlyChange"); @@ -465,6 +466,7 @@ public final class COSName extends COSBa public static final COSName VIEWER_PREFERENCES = new COSName("ViewerPreferences"); // W public static final COSName W = new COSName("W"); + public static final COSName W2 = new COSName("W2"); public static final COSName WHITE_POINT = new COSName("WhitePoint"); public static final COSName WIDTH = new COSName("Width"); public static final COSName WIDTHS = new COSName("Widths"); Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/DictionaryEncoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/DictionaryEncoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/DictionaryEncoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/DictionaryEncoding.java Sat Aug 30 02:26:57 2014 @@ -1,4 +1,5 @@ /* +/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. @@ -16,7 +17,8 @@ */ package org.apache.pdfbox.encoding; -import java.io.IOException; +import java.util.HashMap; +import java.util.Map; import org.apache.pdfbox.cos.COSArray; import org.apache.pdfbox.cos.COSBase; @@ -27,47 +29,66 @@ import org.apache.pdfbox.cos.COSNumber; /** * This will perform the encoding from a dictionary. * - * @author <a href="mailto:b...@benlitchfield.com">Ben Litchfield</a> - * @version $Revision: 1.13 $ + * @author Ben Litchfield */ public class DictionaryEncoding extends Encoding { - private COSDictionary encoding = null; + private final COSDictionary encoding; + private final Encoding baseEncoding; + private final Map<Integer, String> differences = new HashMap<Integer, String>(); /** - * Constructor. + * Creates a new DictionaryEncoding for embedding. + */ + public DictionaryEncoding(COSName baseEncoding, COSArray differences) + { + encoding = new COSDictionary(); + encoding.setItem(COSName.NAME, COSName.ENCODING); + encoding.setItem(COSName.DIFFERENCES, differences); + if (baseEncoding != COSName.STANDARD_ENCODING) + { + encoding.setItem(COSName.BASE_ENCODING, baseEncoding); + this.baseEncoding = Encoding.getInstance(baseEncoding); + } + else + { + this.baseEncoding = Encoding.getInstance(baseEncoding); + } + } + + /** + * Creates a new DictionaryEncoding from a PDF. * * @param fontEncoding The encoding dictionary. - * - * @throws IOException If there is a problem getting the base font. */ - public DictionaryEncoding( COSDictionary fontEncoding ) throws IOException + public DictionaryEncoding(COSDictionary fontEncoding, boolean isNonSymbolic, Encoding builtIn) { encoding = fontEncoding; - //first set up the base encoding - //The previious value WinAnsiEncoding() has been changed to StandardEnding - //see p 389 of the PDF 1.5 ref�rence table 5.11 entries in a dictionary encoding - //"If this entry is absent, the Differences entry describes differences from an implicit - //base encoding. For a font program that is embedded in the PDF file, the - //implicit base encoding is the font program�s built-in encoding, as described - //above and further elaborated in the sections on specific font types below. Otherwise, - //for a nonsymbolic font, it is StandardEncoding, and for a symbolic font, it - //is the font�s built-in encoding." - - // The default base encoding is standardEncoding - Encoding baseEncoding = StandardEncoding.INSTANCE; - COSName baseEncodingName = - (COSName) encoding.getDictionaryObject(COSName.BASE_ENCODING); - if (baseEncodingName != null) { - baseEncoding = Encoding.getInstance(baseEncodingName); + if (encoding.containsKey(COSName.BASE_ENCODING)) + { + COSName name = encoding.getCOSName(COSName.BASE_ENCODING); + baseEncoding = Encoding.getInstance(name); + } + else if (isNonSymbolic) + { + // Otherwise, for a nonsymbolic font, it is StandardEncoding + baseEncoding = StandardEncoding.INSTANCE; + } + else + { + // and for a symbolic font, it is the font's built-in encoding." + baseEncoding = builtIn; + if (builtIn == null) + { + throw new IllegalArgumentException("Built-in Encoding required for symbolic font"); + } } - nameToCode.putAll( baseEncoding.nameToCode ); codeToName.putAll( baseEncoding.codeToName ); + names.addAll( baseEncoding.names ); - - //now replace with the differences. + // now replace with the differences COSArray differences = (COSArray)encoding.getDictionaryObject( COSName.DIFFERENCES ); int currentIndex = -1; for( int i=0; differences != null && i<differences.size(); i++ ) @@ -80,12 +101,30 @@ public class DictionaryEncoding extends else if( next instanceof COSName ) { COSName name = (COSName)next; - addCharacterEncoding( currentIndex++, name.getName() ); + add(currentIndex, name.getName()); + this.differences.put(currentIndex, name.getName()); + currentIndex++; } } } /** + * Returns the base encoding. + */ + public Encoding getBaseEncoding() + { + return baseEncoding; + } + + /** + * Returns the Differences array. + */ + public Map<Integer, String> getDifferences() + { + return differences; + } + + /** * Convert this standard java object to a COS object. * * @return The cos object that matches this Java object. Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Encoding.java Sat Aug 30 02:26:57 2014 @@ -16,39 +16,30 @@ */ package org.apache.pdfbox.encoding; -import java.io.File; -import java.io.IOException; import java.util.Collections; -import java.util.Enumeration; import java.util.HashMap; +import java.util.HashSet; import java.util.Map; -import java.util.MissingResourceException; -import java.util.Properties; -import java.util.StringTokenizer; +import java.util.Set; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; import org.apache.pdfbox.cos.COSName; import org.apache.pdfbox.pdmodel.common.COSObjectable; -import org.apache.pdfbox.util.ResourceLoader; /** - * This is an interface to a text encoder. + * A PostScript encoding vector, maps character codes to glyph names. * * @author Ben Litchfield */ public abstract class Encoding implements COSObjectable { - private static final Log LOG = LogFactory.getLog(Encoding.class); - /** * This will get an encoding by name. * * @param name The name of the encoding to get. * @return The encoding that matches the name. - * @throws IOException if there is no encoding with that name. */ - public static Encoding getInstance(COSName name) throws IOException { + public static Encoding getInstance(COSName name) + { if (COSName.STANDARD_ENCODING.equals(name)) { return StandardEncoding.INSTANCE; @@ -61,113 +52,14 @@ public abstract class Encoding implement { return MacRomanEncoding.INSTANCE; } - else if (COSName.PDF_DOC_ENCODING.equals(name)) - { - return PdfDocEncoding.INSTANCE; - } else { - throw new IOException("Unknown encoding for '" + name.getName() + "'"); + return null; } } - /** Identifies a non-mapped character. */ - public static final String NOTDEF = ".notdef"; - - /** - * This is a mapping from a character code to a character name. - */ protected final Map<Integer, String> codeToName = new HashMap<Integer, String>(); - - /** - * This is a mapping from a character name to a character code. - */ - protected final Map<String, Integer> nameToCode = new HashMap<String, Integer>(); - - private static final Map<String, String> NAME_TO_CHARACTER = new HashMap<String, String>(); - - private static final Map<String, String> CHARACTER_TO_NAME = new HashMap<String, String>(); - - static - { - // Loads the official glyph List based on adobes glyph list - loadGlyphProperties("org/apache/pdfbox/resources/glyphlist.properties"); - // Loads some additional glyph mappings - loadGlyphProperties("org/apache/pdfbox/resources/additional_glyphlist.properties"); - - // Load an external glyph list file that user can give as JVM property - try - { - String location = System.getProperty("glyphlist_ext"); - if (location != null) - { - File external = new File(location); - if (external.exists()) - { - loadGlyphProperties(location); - } - } - } - catch (SecurityException e) // can occur on Sytem.getProperty - { - // PDFBOX-1946 ignore and continue - } - - NAME_TO_CHARACTER.put(NOTDEF, ""); - NAME_TO_CHARACTER.put("fi", "fi"); - NAME_TO_CHARACTER.put("fl", "fl"); - NAME_TO_CHARACTER.put("ffi", "ffi"); - NAME_TO_CHARACTER.put("ff", "ff"); - NAME_TO_CHARACTER.put("pi", "pi"); - - for (Map.Entry<String, String> entry : NAME_TO_CHARACTER.entrySet()) - { - CHARACTER_TO_NAME.put(entry.getValue(), entry.getKey()); - } - } - - /** - * Loads a glyph list from a given location and populates the NAME_TO_CHARACTER hashmap for character lookups. - * - * @param location - The string location of the glyphlist file - */ - private static void loadGlyphProperties(String location) - { - try - { - Properties glyphProperties = ResourceLoader.loadProperties(location, false); - if (glyphProperties == null) - { - throw new MissingResourceException("Glyphlist not found: " + location, Encoding.class.getName(), - location); - } - Enumeration<?> names = glyphProperties.propertyNames(); - for (Object name : Collections.list(names)) - { - String glyphName = name.toString(); - String unicodeValue = glyphProperties.getProperty(glyphName); - StringTokenizer tokenizer = new StringTokenizer(unicodeValue, " ", false); - StringBuilder value = new StringBuilder(); - while (tokenizer.hasMoreTokens()) - { - int characterCode = Integer.parseInt(tokenizer.nextToken(), 16); - value.append((char) characterCode); - } - if (NAME_TO_CHARACTER.containsKey(glyphName)) - { - LOG.warn("duplicate value for characterName=" + glyphName + "," + value); - } - else - { - NAME_TO_CHARACTER.put(glyphName, value.toString()); - } - } - } - catch (IOException io) - { - LOG.error("error while reading the glyph property file.", io); - } - } + protected final Set<String> names = new HashSet<String>(); /** * Returns an unmodifiable view of the Code2Name mapping. @@ -180,222 +72,50 @@ public abstract class Encoding implement } /** - * Returns an unmodifiable view of the Name2Code mapping. - * - * @return the Name2Code map - */ - public Map<String, Integer> getNameToCodeMap() - { - return Collections.unmodifiableMap(nameToCode); - } - - /** * This will add a character encoding. * - * @param code The character code that matches the character. - * @param name The name of the character. + * @param code character code + * @param name PostScript glyph name */ - public void addCharacterEncoding(int code, String name) + protected void add(int code, String name) { codeToName.put(code, name); - nameToCode.put(name, code); + names.add(name); } /** * Determines if the encoding has a mapping for the given name value. * - * @param name the source value for the mapping - * @return the mapped value + * @param name PostScript glyph name */ - public boolean hasCodeForName(String name) + public boolean contains(String name) { - return nameToCode.containsKey(name); + return names.contains(name); } /** * Determines if the encoding has a mapping for the given code value. * - * @param code the source value for the mapping - * @return the mapped value + * @param code character code */ - public boolean hasNameForCode(int code) + public boolean contains(int code) { return codeToName.containsKey(code); } - - /** - * This will get the character code for the name. - * - * @param name The name of the character. - * - * @return The code for the character. - * - * @throws IOException If there is no character code for the name. - */ - public int getCode(String name) throws IOException - { - Integer code = nameToCode.get(name); - if (code == null) - { - throw new IOException("No character code for character name '" + name + "'"); - } - return code; - } - - /** - * This will take a character code and get the name from the code. - * - * @param code The character code. - * - * @return The name of the character. - * - * @throws IOException If there is no name for the code. - */ - public String getName(int code) throws IOException - { - return codeToName.get(code); - } /** * This will take a character code and get the name from the code. * - * @param c The character. - * - * @return The name of the character. - * - * @throws IOException If there is no name for the character. + * @param code character code + * @return PostScript glyph name */ - public String getNameForCharacter(char c) throws IOException + public String getName(int code) { - String name = CHARACTER_TO_NAME.get(Character.toString(c)); - if (name == null) - { - throw new IOException("No name for character '" + c + "'"); - } - return name; + String name = codeToName.get(code); + if (name != null) + { + return name; + } + return ".notdef"; } - - /** - * This will take a name and get the character code for that name. - * - * @param name The name. - * - * @return The name of the character. - * - */ - public static String getCharacterForName(String name) - { - if (NAME_TO_CHARACTER.containsKey(name)) - { - return NAME_TO_CHARACTER.get(name); - } - if (LOG.isDebugEnabled()) - { - LOG.debug("No character for name " + name); - } - return null; - } - - /** - * This will get the character from the code. - * - * @param code The character code. - * - * @return The printable character for the code. - * - * @throws IOException If there is not name for the character. - */ - public String getCharacter(int code) throws IOException - { - String name = getName(code); - if (name != null) - { - return getCharacter(name); - } - return null; - } - - /** - * This will get the character from the name. - * - * @param name The name of the character. - * - * @return The printable character for the code. - */ - public String getCharacter(String name) - { - String character = NAME_TO_CHARACTER.get(name); - if (character == null) - { - // test if we have a suffix and if so remove it - if (name.indexOf('.') > 0) - { - character = getCharacter(name.substring(0, name.indexOf('.'))); - } - // test for Unicode name - // (uniXXXX - XXXX must be a multiple of four; - // each representing a hexadecimal Unicode code point) - else if (name.startsWith("uni")) - { - int nameLength = name.length(); - StringBuilder uniStr = new StringBuilder(); - try - { - for (int chPos = 3; chPos + 4 <= nameLength; chPos += 4) - { - int characterCode = Integer.parseInt(name.substring(chPos, chPos + 4), 16); - - if (characterCode > 0xD7FF && characterCode < 0xE000) - { - LOG.warn("Unicode character name with not allowed code area: " + name); - } - else - { - uniStr.append((char) characterCode); - } - } - character = uniStr.toString(); - NAME_TO_CHARACTER.put(name, character); - } - catch (NumberFormatException nfe) - { - LOG.warn("Not a number in Unicode character name: " + name); - character = name; - } - } - // test for an alternate Unicode name representation - else if (name.startsWith("u")) - { - try - { - int characterCode = Integer.parseInt(name.substring(1), 16); - if (characterCode > 0xD7FF && characterCode < 0xE000) - { - LOG.warn("Unicode character name with not allowed code area: " + name); - } - else - { - character = String.valueOf((char) characterCode); - NAME_TO_CHARACTER.put(name, character); - } - } - catch (NumberFormatException nfe) - { - LOG.warn("Not a number in Unicode character name: " + name); - character = name; - } - } - else if (nameToCode.containsKey(name)) - { - int code = nameToCode.get(name); - character = Character.toString((char) code); - } - else - { - character = name; - } - } - return character; - } - } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacOSRomanEncoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacOSRomanEncoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacOSRomanEncoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacOSRomanEncoding.java Sat Aug 30 02:26:57 2014 @@ -38,22 +38,22 @@ public class MacOSRomanEncoding extends public MacOSRomanEncoding() { super(); - addCharacterEncoding(255, "notequal"); - addCharacterEncoding(260, "infinity"); - addCharacterEncoding(262, "lessequal"); - addCharacterEncoding(263, "greaterequal"); - addCharacterEncoding(266, "partialdiff"); - addCharacterEncoding(267, "summation"); - addCharacterEncoding(270, "product"); - addCharacterEncoding(271, "pi"); - addCharacterEncoding(272, "integral"); - addCharacterEncoding(275, "Omega"); - addCharacterEncoding(303, "radical"); - addCharacterEncoding(305, "approxequal"); - addCharacterEncoding(306, "Delta"); - addCharacterEncoding(327, "lozenge"); - addCharacterEncoding(333, "Euro"); - addCharacterEncoding(360, "apple"); + add(255, "notequal"); + add(260, "infinity"); + add(262, "lessequal"); + add(263, "greaterequal"); + add(266, "partialdiff"); + add(267, "summation"); + add(270, "product"); + add(271, "pi"); + add(272, "integral"); + add(275, "Omega"); + add(303, "radical"); + add(305, "approxequal"); + add(306, "Delta"); + add(327, "lozenge"); + add(333, "Euro"); + add(360, "apple"); } /** Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/MacRomanEncoding.java Sat Aug 30 02:26:57 2014 @@ -39,217 +39,215 @@ public class MacRomanEncoding extends En */ public MacRomanEncoding() { - addCharacterEncoding( 0101, "A" ); - addCharacterEncoding( 0256, "AE" ); - addCharacterEncoding( 0347, "Aacute" ); - addCharacterEncoding( 0345, "Acircumflex" ); - addCharacterEncoding( 0200, "Adieresis" ); - addCharacterEncoding( 0313, "Agrave" ); - addCharacterEncoding( 0201, "Aring" ); - addCharacterEncoding( 0314, "Atilde" ); - addCharacterEncoding( 0102, "B" ); - addCharacterEncoding( 0103, "C" ); - addCharacterEncoding( 0202, "Ccedilla" ); - addCharacterEncoding( 0104, "D" ); - addCharacterEncoding( 0105, "E" ); - addCharacterEncoding( 0203, "Eacute" ); - addCharacterEncoding( 0346, "Ecircumflex" ); - addCharacterEncoding( 0350, "Edieresis" ); - addCharacterEncoding( 0351, "Egrave" ); - addCharacterEncoding( 0106, "F" ); - addCharacterEncoding( 0107, "G" ); - addCharacterEncoding( 0110, "H" ); - addCharacterEncoding( 0111, "I" ); - addCharacterEncoding( 0352, "Iacute" ); - addCharacterEncoding( 0353, "Icircumflex" ); - addCharacterEncoding( 0354, "Idieresis" ); - addCharacterEncoding( 0355, "Igrave" ); - addCharacterEncoding( 0112, "J" ); - addCharacterEncoding( 0113, "K" ); - addCharacterEncoding( 0114, "L" ); - addCharacterEncoding( 0115, "M" ); - addCharacterEncoding( 0116, "N" ); - addCharacterEncoding( 0204, "Ntilde" ); - addCharacterEncoding( 0117, "O" ); - addCharacterEncoding( 0316, "OE" ); - addCharacterEncoding( 0356, "Oacute" ); - addCharacterEncoding( 0357, "Ocircumflex" ); - addCharacterEncoding( 0205, "Odieresis" ); - addCharacterEncoding( 0361, "Ograve" ); - addCharacterEncoding( 0257, "Oslash" ); - addCharacterEncoding( 0315, "Otilde" ); - addCharacterEncoding( 0120, "P" ); - addCharacterEncoding( 0121, "Q" ); - addCharacterEncoding( 0122, "R" ); - addCharacterEncoding( 0123, "S" ); - addCharacterEncoding( 0124, "T" ); - addCharacterEncoding( 0125, "U" ); - addCharacterEncoding( 0362, "Uacute" ); - addCharacterEncoding( 0363, "Ucircumflex" ); - addCharacterEncoding( 0206, "Udieresis" ); - addCharacterEncoding( 0364, "Ugrave" ); - addCharacterEncoding( 0126, "V" ); - addCharacterEncoding( 0127, "W" ); - addCharacterEncoding( 0130, "X" ); - addCharacterEncoding( 0131, "Y" ); - addCharacterEncoding( 0331, "Ydieresis" ); - addCharacterEncoding( 0132, "Z" ); - addCharacterEncoding( 0141, "a" ); - addCharacterEncoding( 0207, "aacute" ); - addCharacterEncoding( 0211, "acircumflex" ); - addCharacterEncoding( 0253, "acute" ); - addCharacterEncoding( 0212, "adieresis" ); - addCharacterEncoding( 0276, "ae" ); - addCharacterEncoding( 0210, "agrave" ); - addCharacterEncoding( 046, "ampersand" ); - addCharacterEncoding( 0214, "aring" ); - addCharacterEncoding( 0136, "asciicircum" ); - addCharacterEncoding( 0176, "asciitilde" ); - addCharacterEncoding( 052, "asterisk" ); - addCharacterEncoding( 0100, "at" ); - addCharacterEncoding( 0213, "atilde" ); - addCharacterEncoding( 0142, "b" ); - addCharacterEncoding( 0134, "backslash" ); - addCharacterEncoding( 0174, "bar" ); - addCharacterEncoding( 0173, "braceleft" ); - addCharacterEncoding( 0175, "braceright" ); - addCharacterEncoding( 0133, "bracketleft" ); - addCharacterEncoding( 0135, "bracketright" ); - addCharacterEncoding( 0371, "breve" ); - addCharacterEncoding( 0245, "bullet" ); - addCharacterEncoding( 0143, "c" ); - addCharacterEncoding( 0377, "caron" ); - addCharacterEncoding( 0215, "ccedilla" ); - addCharacterEncoding( 0374, "cedilla" ); - addCharacterEncoding( 0242, "cent" ); - addCharacterEncoding( 0366, "circumflex" ); - addCharacterEncoding( 072, "colon" ); - addCharacterEncoding( 054, "comma" ); - addCharacterEncoding( 0251, "copyright" ); - addCharacterEncoding( 0333, "currency" ); - addCharacterEncoding( 0144, "d" ); - addCharacterEncoding( 0240, "dagger" ); - addCharacterEncoding( 0340, "daggerdbl" ); - addCharacterEncoding( 0241, "degree" ); - addCharacterEncoding( 0254, "dieresis" ); - addCharacterEncoding( 0326, "divide" ); - addCharacterEncoding( 044, "dollar" ); - addCharacterEncoding( 0372, "dotaccent" ); - addCharacterEncoding( 0365, "dotlessi" ); - addCharacterEncoding( 0145, "e" ); - addCharacterEncoding( 0216, "eacute" ); - addCharacterEncoding( 0220, "ecircumflex" ); - addCharacterEncoding( 0221, "edieresis" ); - addCharacterEncoding( 0217, "egrave" ); - addCharacterEncoding( 070, "eight" ); - addCharacterEncoding( 0311, "ellipsis" ); - addCharacterEncoding( 0321, "emdash" ); - addCharacterEncoding( 0320, "endash" ); - addCharacterEncoding( 075, "equal" ); - addCharacterEncoding( 041, "exclam" ); - addCharacterEncoding( 0301, "exclamdown" ); - addCharacterEncoding( 0146, "f" ); - addCharacterEncoding( 0336, "fi" ); - addCharacterEncoding( 065, "five" ); - addCharacterEncoding( 0337, "fl" ); - addCharacterEncoding( 0304, "florin" ); - addCharacterEncoding( 064, "four" ); - addCharacterEncoding( 0332, "fraction" ); - addCharacterEncoding( 0147, "g" ); - addCharacterEncoding( 0247, "germandbls" ); - addCharacterEncoding( 0140, "grave" ); - addCharacterEncoding( 076, "greater" ); - addCharacterEncoding( 0307, "guillemotleft" ); - addCharacterEncoding( 0310, "guillemotright" ); - addCharacterEncoding( 0334, "guilsinglleft" ); - addCharacterEncoding( 0335, "guilsinglright" ); - addCharacterEncoding( 0150, "h" ); - addCharacterEncoding( 0375, "hungarumlaut" ); - addCharacterEncoding( 055, "hyphen" ); - addCharacterEncoding( 0151, "i" ); - addCharacterEncoding( 0222, "iacute" ); - addCharacterEncoding( 0224, "icircumflex" ); - addCharacterEncoding( 0225, "idieresis" ); - addCharacterEncoding( 0223, "igrave" ); - addCharacterEncoding( 0152, "j" ); - addCharacterEncoding( 0153, "k" ); - addCharacterEncoding( 0154, "l" ); - addCharacterEncoding( 074, "less" ); - addCharacterEncoding( 0302, "logicalnot" ); - addCharacterEncoding( 0155, "m" ); - addCharacterEncoding( 0370, "macron" ); - addCharacterEncoding( 0265, "mu" ); - addCharacterEncoding( 0156, "n" ); - addCharacterEncoding( 071, "nine" ); - addCharacterEncoding( 0226, "ntilde" ); - addCharacterEncoding( 043, "numbersign" ); - addCharacterEncoding( 0157, "o" ); - addCharacterEncoding( 0227, "oacute" ); - addCharacterEncoding( 0231, "ocircumflex" ); - addCharacterEncoding( 0232, "odieresis" ); - addCharacterEncoding( 0317, "oe" ); - addCharacterEncoding( 0376, "ogonek" ); - addCharacterEncoding( 0230, "ograve" ); - addCharacterEncoding( 061, "one" ); - addCharacterEncoding( 0273, "ordfeminine" ); - addCharacterEncoding( 0274, "ordmasculine" ); - addCharacterEncoding( 0277, "oslash" ); - addCharacterEncoding( 0233, "otilde" ); - addCharacterEncoding( 0160, "p" ); - addCharacterEncoding( 0246, "paragraph" ); - addCharacterEncoding( 050, "parenleft" ); - addCharacterEncoding( 051, "parenright" ); - addCharacterEncoding( 045, "percent" ); - addCharacterEncoding( 056, "period" ); - addCharacterEncoding( 0341, "periodcentered" ); - addCharacterEncoding( 0344, "perthousand" ); - addCharacterEncoding( 053, "plus" ); - addCharacterEncoding( 0261, "plusminus" ); - addCharacterEncoding( 0161, "q" ); - addCharacterEncoding( 077, "question" ); - addCharacterEncoding( 0300, "questiondown" ); - addCharacterEncoding( 042, "quotedbl" ); - addCharacterEncoding( 0343, "quotedblbase" ); - addCharacterEncoding( 0322, "quotedblleft" ); - addCharacterEncoding( 0323, "quotedblright" ); - addCharacterEncoding( 0324, "quoteleft" ); - addCharacterEncoding( 0325, "quoteright" ); - addCharacterEncoding( 0342, "quotesinglbase" ); - addCharacterEncoding( 047, "quotesingle" ); - addCharacterEncoding( 0162, "r" ); - addCharacterEncoding( 0250, "registered" ); - addCharacterEncoding( 0373, "ring" ); - addCharacterEncoding( 0163, "s" ); - addCharacterEncoding( 0244, "section" ); - addCharacterEncoding( 073, "semicolon" ); - addCharacterEncoding( 067, "seven" ); - addCharacterEncoding( 066, "six" ); - addCharacterEncoding( 057, "slash" ); - addCharacterEncoding( 040, "space" ); - addCharacterEncoding( 0243, "sterling" ); - addCharacterEncoding( 0164, "t" ); - addCharacterEncoding( 063, "three" ); - addCharacterEncoding( 0367, "tilde" ); - addCharacterEncoding( 0252, "trademark" ); - addCharacterEncoding( 062, "two" ); - addCharacterEncoding( 0165, "u" ); - addCharacterEncoding( 0234, "uacute" ); - addCharacterEncoding( 0236, "ucircumflex" ); - addCharacterEncoding( 0237, "udieresis" ); - addCharacterEncoding( 0235, "ugrave" ); - addCharacterEncoding( 0137, "underscore" ); - addCharacterEncoding( 0166, "v" ); - addCharacterEncoding( 0167, "w" ); - addCharacterEncoding( 0170, "x" ); - addCharacterEncoding( 0171, "y" ); - addCharacterEncoding( 0330, "ydieresis" ); - addCharacterEncoding( 0264, "yen" ); - addCharacterEncoding( 0172, "z" ); - addCharacterEncoding( 060, "zero" ); + add(0101, "A"); + add(0256, "AE"); + add(0347, "Aacute"); + add(0345, "Acircumflex"); + add(0200, "Adieresis"); + add(0313, "Agrave"); + add(0201, "Aring"); + add(0314, "Atilde"); + add(0102, "B"); + add(0103, "C"); + add(0202, "Ccedilla"); + add(0104, "D"); + add(0105, "E"); + add(0203, "Eacute"); + add(0346, "Ecircumflex"); + add(0350, "Edieresis"); + add(0351, "Egrave"); + add(0106, "F"); + add(0107, "G"); + add(0110, "H"); + add(0111, "I"); + add(0352, "Iacute"); + add(0353, "Icircumflex"); + add(0354, "Idieresis"); + add(0355, "Igrave"); + add(0112, "J"); + add(0113, "K"); + add(0114, "L"); + add(0115, "M"); + add(0116, "N"); + add(0204, "Ntilde"); + add(0117, "O"); + add(0316, "OE"); + add(0356, "Oacute"); + add(0357, "Ocircumflex"); + add(0205, "Odieresis"); + add(0361, "Ograve"); + add(0257, "Oslash"); + add(0315, "Otilde"); + add(0120, "P"); + add(0121, "Q"); + add(0122, "R"); + add(0123, "S"); + add(0124, "T"); + add(0125, "U"); + add(0362, "Uacute"); + add(0363, "Ucircumflex"); + add(0206, "Udieresis"); + add(0364, "Ugrave"); + add(0126, "V"); + add(0127, "W"); + add(0130, "X"); + add(0131, "Y"); + add(0331, "Ydieresis"); + add(0132, "Z"); + add(0141, "a"); + add(0207, "aacute"); + add(0211, "acircumflex"); + add(0253, "acute"); + add(0212, "adieresis"); + add(0276, "ae"); + add(0210, "agrave"); + add(046, "ampersand"); + add(0214, "aring"); + add(0136, "asciicircum"); + add(0176, "asciitilde"); + add(052, "asterisk"); + add(0100, "at"); + add(0213, "atilde"); + add(0142, "b"); + add(0134, "backslash"); + add(0174, "bar"); + add(0173, "braceleft"); + add(0175, "braceright"); + add(0133, "bracketleft"); + add(0135, "bracketright"); + add(0371, "breve"); + add(0245, "bullet"); + add(0143, "c"); + add(0377, "caron"); + add(0215, "ccedilla"); + add(0374, "cedilla"); + add(0242, "cent"); + add(0366, "circumflex"); + add(072, "colon"); + add(054, "comma"); + add(0251, "copyright"); + add(0333, "currency"); + add(0144, "d"); + add(0240, "dagger"); + add(0340, "daggerdbl"); + add(0241, "degree"); + add(0254, "dieresis"); + add(0326, "divide"); + add(044, "dollar"); + add(0372, "dotaccent"); + add(0365, "dotlessi"); + add(0145, "e"); + add(0216, "eacute"); + add(0220, "ecircumflex"); + add(0221, "edieresis"); + add(0217, "egrave"); + add(070, "eight"); + add(0311, "ellipsis"); + add(0321, "emdash"); + add(0320, "endash"); + add(075, "equal"); + add(041, "exclam"); + add(0301, "exclamdown"); + add(0146, "f"); + add(0336, "fi"); + add(065, "five"); + add(0337, "fl"); + add(0304, "florin"); + add(064, "four"); + add(0332, "fraction"); + add(0147, "g"); + add(0247, "germandbls"); + add(0140, "grave"); + add(076, "greater"); + add(0307, "guillemotleft"); + add(0310, "guillemotright"); + add(0334, "guilsinglleft"); + add(0335, "guilsinglright"); + add(0150, "h"); + add(0375, "hungarumlaut"); + add(055, "hyphen"); + add(0151, "i"); + add(0222, "iacute"); + add(0224, "icircumflex"); + add(0225, "idieresis"); + add(0223, "igrave"); + add(0152, "j"); + add(0153, "k"); + add(0154, "l"); + add(074, "less"); + add(0302, "logicalnot"); + add(0155, "m"); + add(0370, "macron"); + add(0265, "mu"); + add(0156, "n"); + add(071, "nine"); + add(0226, "ntilde"); + add(043, "numbersign"); + add(0157, "o"); + add(0227, "oacute"); + add(0231, "ocircumflex"); + add(0232, "odieresis"); + add(0317, "oe"); + add(0376, "ogonek"); + add(0230, "ograve"); + add(061, "one"); + add(0273, "ordfeminine"); + add(0274, "ordmasculine"); + add(0277, "oslash"); + add(0233, "otilde"); + add(0160, "p"); + add(0246, "paragraph"); + add(050, "parenleft"); + add(051, "parenright"); + add(045, "percent"); + add(056, "period"); + add(0341, "periodcentered"); + add(0344, "perthousand"); + add(053, "plus"); + add(0261, "plusminus"); + add(0161, "q"); + add(077, "question"); + add(0300, "questiondown"); + add(042, "quotedbl"); + add(0343, "quotedblbase"); + add(0322, "quotedblleft"); + add(0323, "quotedblright"); + add(0324, "quoteleft"); + add(0325, "quoteright"); + add(0342, "quotesinglbase"); + add(047, "quotesingle"); + add(0162, "r"); + add(0250, "registered"); + add(0373, "ring"); + add(0163, "s"); + add(0244, "section"); + add(073, "semicolon"); + add(067, "seven"); + add(066, "six"); + add(057, "slash"); + add(040, "space"); + add(0243, "sterling"); + add(0164, "t"); + add(063, "three"); + add(0367, "tilde"); + add(0252, "trademark"); + add(062, "two"); + add(0165, "u"); + add(0234, "uacute"); + add(0236, "ucircumflex"); + add(0237, "udieresis"); + add(0235, "ugrave"); + add(0137, "underscore"); + add(0166, "v"); + add(0167, "w"); + add(0170, "x"); + add(0171, "y"); + add(0330, "ydieresis"); + add(0264, "yen"); + add(0172, "z"); + add(060, "zero"); // adding an additional mapping as defined in Appendix D of the pdf spec - // we must not add it to both mappings as the nameToCode mapping - // wouldn't be unique - codeToName.put(0312, "space"); + add(0312, "space"); } /** Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/PDFDocEncodingCharset.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/PDFDocEncodingCharset.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/PDFDocEncodingCharset.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/PDFDocEncodingCharset.java Sat Aug 30 02:26:57 2014 @@ -24,10 +24,6 @@ import java.nio.charset.Charset; */ public class PDFDocEncodingCharset extends SingleByteCharset { - - /** Canonical name for the PDFDocEncoding. */ - public static final String NAME = "PDFDocEncoding"; - /** Singleton instance. */ public static final PDFDocEncodingCharset INSTANCE = new PDFDocEncodingCharset(); @@ -36,7 +32,7 @@ public class PDFDocEncodingCharset exten */ public PDFDocEncodingCharset() { - super(NAME, null, createEncoding()); + super("PDFDocEncoding", null, createEncoding()); } private static char[] createEncoding() Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/StandardEncoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/StandardEncoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/StandardEncoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/StandardEncoding.java Sat Aug 30 02:26:57 2014 @@ -40,155 +40,155 @@ public class StandardEncoding extends En */ public StandardEncoding() { - addCharacterEncoding( 0101, "A" ); - addCharacterEncoding( 0341, "AE" ); - addCharacterEncoding( 0102, "B" ); - addCharacterEncoding( 0103, "C" ); - addCharacterEncoding( 0104, "D" ); - addCharacterEncoding( 0105, "E" ); - addCharacterEncoding( 0106, "F" ); - addCharacterEncoding( 0107, "G" ); - addCharacterEncoding( 0110, "H" ); - addCharacterEncoding( 0111, "I" ); - addCharacterEncoding( 0112, "J" ); - addCharacterEncoding( 0113, "K" ); - addCharacterEncoding( 0114, "L" ); - addCharacterEncoding( 0350, "Lslash" ); - addCharacterEncoding( 0115, "M" ); - addCharacterEncoding( 0116, "N" ); - addCharacterEncoding( 0117, "O" ); - addCharacterEncoding( 0352, "OE" ); - addCharacterEncoding( 0351, "Oslash" ); - addCharacterEncoding( 0120, "P" ); - addCharacterEncoding( 0121, "Q" ); - addCharacterEncoding( 0122, "R" ); - addCharacterEncoding( 0123, "S" ); - addCharacterEncoding( 0124, "T" ); - addCharacterEncoding( 0125, "U" ); - addCharacterEncoding( 0126, "V" ); - addCharacterEncoding( 0127, "W" ); - addCharacterEncoding( 0130, "X" ); - addCharacterEncoding( 0131, "Y" ); - addCharacterEncoding( 0132, "Z" ); - addCharacterEncoding( 0141, "a" ); - addCharacterEncoding( 0302, "acute" ); - addCharacterEncoding( 0361, "ae" ); - addCharacterEncoding( 0046, "ampersand" ); - addCharacterEncoding( 0136, "asciicircum" ); - addCharacterEncoding( 0176, "asciitilde" ); - addCharacterEncoding( 0052, "asterisk" ); - addCharacterEncoding( 0100, "at" ); - addCharacterEncoding( 0142, "b" ); - addCharacterEncoding( 0134, "backslash" ); - addCharacterEncoding( 0174, "bar" ); - addCharacterEncoding( 0173, "braceleft" ); - addCharacterEncoding( 0175, "braceright" ); - addCharacterEncoding( 0133, "bracketleft" ); - addCharacterEncoding( 0135, "bracketright" ); - addCharacterEncoding( 0306, "breve" ); - addCharacterEncoding( 0267, "bullet" ); - addCharacterEncoding( 0143, "c" ); - addCharacterEncoding( 0317, "caron" ); - addCharacterEncoding( 0313, "cedilla" ); - addCharacterEncoding( 0242, "cent" ); - addCharacterEncoding( 0303, "circumflex" ); - addCharacterEncoding( 0072, "colon" ); - addCharacterEncoding( 0054, "comma" ); - addCharacterEncoding( 0250, "currency" ); - addCharacterEncoding( 0144, "d" ); - addCharacterEncoding( 0262, "dagger" ); - addCharacterEncoding( 0263, "daggerdbl" ); - addCharacterEncoding( 0310, "dieresis" ); - addCharacterEncoding( 0044, "dollar" ); - addCharacterEncoding( 0307, "dotaccent" ); - addCharacterEncoding( 0365, "dotlessi" ); - addCharacterEncoding( 0145, "e" ); - addCharacterEncoding( 0070, "eight" ); - addCharacterEncoding( 0274, "ellipsis" ); - addCharacterEncoding( 0320, "emdash" ); - addCharacterEncoding( 0261, "endash" ); - addCharacterEncoding( 0075, "equal" ); - addCharacterEncoding( 0041, "exclam" ); - addCharacterEncoding( 0241, "exclamdown" ); - addCharacterEncoding( 0146, "f" ); - addCharacterEncoding( 0256, "fi" ); - addCharacterEncoding( 0065, "five" ); - addCharacterEncoding( 0257, "fl" ); - addCharacterEncoding( 0246, "florin" ); - addCharacterEncoding( 0064, "four" ); - addCharacterEncoding( 0244, "fraction" ); - addCharacterEncoding( 0147, "g" ); - addCharacterEncoding( 0373, "germandbls" ); - addCharacterEncoding( 0301, "grave" ); - addCharacterEncoding( 0076, "greater" ); - addCharacterEncoding( 0253, "guillemotleft" ); - addCharacterEncoding( 0273, "guillemotright" ); - addCharacterEncoding( 0254, "guilsinglleft" ); - addCharacterEncoding( 0255, "guilsinglright" ); - addCharacterEncoding( 0150, "h" ); - addCharacterEncoding( 0315, "hungarumlaut" ); - addCharacterEncoding( 0055, "hyphen" ); - addCharacterEncoding( 0151, "i" ); - addCharacterEncoding( 0152, "j" ); - addCharacterEncoding( 0153, "k" ); - addCharacterEncoding( 0154, "l" ); - addCharacterEncoding( 0074, "less" ); - addCharacterEncoding( 0370, "lslash" ); - addCharacterEncoding( 0155, "m" ); - addCharacterEncoding( 0305, "macron" ); - addCharacterEncoding( 0156, "n" ); - addCharacterEncoding( 0071, "nine" ); - addCharacterEncoding( 0043, "numbersign" ); - addCharacterEncoding( 0157, "o" ); - addCharacterEncoding( 0372, "oe" ); - addCharacterEncoding( 0316, "ogonek" ); - addCharacterEncoding( 0061, "one" ); - addCharacterEncoding( 0343, "ordfeminine" ); - addCharacterEncoding( 0353, "ordmasculine" ); - addCharacterEncoding( 0371, "oslash" ); - addCharacterEncoding( 0160, "p" ); - addCharacterEncoding( 0266, "paragraph" ); - addCharacterEncoding( 0050, "parenleft" ); - addCharacterEncoding( 0051, "parenright" ); - addCharacterEncoding( 0045, "percent" ); - addCharacterEncoding( 0056, "period" ); - addCharacterEncoding( 0264, "periodcentered" ); - addCharacterEncoding( 0275, "perthousand" ); - addCharacterEncoding( 0053, "plus" ); - addCharacterEncoding( 0161, "q" ); - addCharacterEncoding( 0077, "question" ); - addCharacterEncoding( 0277, "questiondown" ); - addCharacterEncoding( 0042, "quotedbl" ); - addCharacterEncoding( 0271, "quotedblbase" ); - addCharacterEncoding( 0252, "quotedblleft" ); - addCharacterEncoding( 0272, "quotedblright" ); - addCharacterEncoding( 0140, "quoteleft" ); - addCharacterEncoding( 0047, "quoteright" ); - addCharacterEncoding( 0270, "quotesinglbase" ); - addCharacterEncoding( 0251, "quotesingle" ); - addCharacterEncoding( 0162, "r" ); - addCharacterEncoding( 0312, "ring" ); - addCharacterEncoding( 0163, "s" ); - addCharacterEncoding( 0247, "section" ); - addCharacterEncoding( 0073, "semicolon" ); - addCharacterEncoding( 0067, "seven" ); - addCharacterEncoding( 0066, "six" ); - addCharacterEncoding( 0057, "slash" ); - addCharacterEncoding( 0040, "space" ); - addCharacterEncoding( 0243, "sterling" ); - addCharacterEncoding( 0164, "t" ); - addCharacterEncoding( 0063, "three" ); - addCharacterEncoding( 0304, "tilde" ); - addCharacterEncoding( 0062, "two" ); - addCharacterEncoding( 0165, "u" ); - addCharacterEncoding( 0137, "underscore" ); - addCharacterEncoding( 0166, "v" ); - addCharacterEncoding( 0167, "w" ); - addCharacterEncoding( 0170, "x" ); - addCharacterEncoding( 0171, "y" ); - addCharacterEncoding( 0245, "yen" ); - addCharacterEncoding( 0172, "z" ); - addCharacterEncoding( 0060, "zero" ); + add(0101, "A"); + add(0341, "AE"); + add(0102, "B"); + add(0103, "C"); + add(0104, "D"); + add(0105, "E"); + add(0106, "F"); + add(0107, "G"); + add(0110, "H"); + add(0111, "I"); + add(0112, "J"); + add(0113, "K"); + add(0114, "L"); + add(0350, "Lslash"); + add(0115, "M"); + add(0116, "N"); + add(0117, "O"); + add(0352, "OE"); + add(0351, "Oslash"); + add(0120, "P"); + add(0121, "Q"); + add(0122, "R"); + add(0123, "S"); + add(0124, "T"); + add(0125, "U"); + add(0126, "V"); + add(0127, "W"); + add(0130, "X"); + add(0131, "Y"); + add(0132, "Z"); + add(0141, "a"); + add(0302, "acute"); + add(0361, "ae"); + add(0046, "ampersand"); + add(0136, "asciicircum"); + add(0176, "asciitilde"); + add(0052, "asterisk"); + add(0100, "at"); + add(0142, "b"); + add(0134, "backslash"); + add(0174, "bar"); + add(0173, "braceleft"); + add(0175, "braceright"); + add(0133, "bracketleft"); + add(0135, "bracketright"); + add(0306, "breve"); + add(0267, "bullet"); + add(0143, "c"); + add(0317, "caron"); + add(0313, "cedilla"); + add(0242, "cent"); + add(0303, "circumflex"); + add(0072, "colon"); + add(0054, "comma"); + add(0250, "currency"); + add(0144, "d"); + add(0262, "dagger"); + add(0263, "daggerdbl"); + add(0310, "dieresis"); + add(0044, "dollar"); + add(0307, "dotaccent"); + add(0365, "dotlessi"); + add(0145, "e"); + add(0070, "eight"); + add(0274, "ellipsis"); + add(0320, "emdash"); + add(0261, "endash"); + add(0075, "equal"); + add(0041, "exclam"); + add(0241, "exclamdown"); + add(0146, "f"); + add(0256, "fi"); + add(0065, "five"); + add(0257, "fl"); + add(0246, "florin"); + add(0064, "four"); + add(0244, "fraction"); + add(0147, "g"); + add(0373, "germandbls"); + add(0301, "grave"); + add(0076, "greater"); + add(0253, "guillemotleft"); + add(0273, "guillemotright"); + add(0254, "guilsinglleft"); + add(0255, "guilsinglright"); + add(0150, "h"); + add(0315, "hungarumlaut"); + add(0055, "hyphen"); + add(0151, "i"); + add(0152, "j"); + add(0153, "k"); + add(0154, "l"); + add(0074, "less"); + add(0370, "lslash"); + add(0155, "m"); + add(0305, "macron"); + add(0156, "n"); + add(0071, "nine"); + add(0043, "numbersign"); + add(0157, "o"); + add(0372, "oe"); + add(0316, "ogonek"); + add(0061, "one"); + add(0343, "ordfeminine"); + add(0353, "ordmasculine"); + add(0371, "oslash"); + add(0160, "p"); + add(0266, "paragraph"); + add(0050, "parenleft"); + add(0051, "parenright"); + add(0045, "percent"); + add(0056, "period"); + add(0264, "periodcentered"); + add(0275, "perthousand"); + add(0053, "plus"); + add(0161, "q"); + add(0077, "question"); + add(0277, "questiondown"); + add(0042, "quotedbl"); + add(0271, "quotedblbase"); + add(0252, "quotedblleft"); + add(0272, "quotedblright"); + add(0140, "quoteleft"); + add(0047, "quoteright"); + add(0270, "quotesinglbase"); + add(0251, "quotesingle"); + add(0162, "r"); + add(0312, "ring"); + add(0163, "s"); + add(0247, "section"); + add(0073, "semicolon"); + add(0067, "seven"); + add(0066, "six"); + add(0057, "slash"); + add(0040, "space"); + add(0243, "sterling"); + add(0164, "t"); + add(0063, "three"); + add(0304, "tilde"); + add(0062, "two"); + add(0165, "u"); + add(0137, "underscore"); + add(0166, "v"); + add(0167, "w"); + add(0170, "x"); + add(0171, "y"); + add(0245, "yen"); + add(0172, "z"); + add(0060, "zero"); } /** Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Type1Encoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Type1Encoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Type1Encoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/Type1Encoding.java Sat Aug 30 02:26:57 2014 @@ -16,28 +16,59 @@ */ package org.apache.pdfbox.encoding; +import org.apache.fontbox.afm.CharMetric; +import org.apache.fontbox.afm.FontMetrics; import org.apache.pdfbox.cos.COSBase; +import java.util.Map; + /** - * This class represents an encoding which was read from a type1 font. - * + * An encoding for a Type 1 font. */ public class Type1Encoding extends Encoding { - public Type1Encoding(int size) + /** + * Creates an encoding from the given FontBox encoding. + * + * @param encoding FontBox encoding + */ + public static Type1Encoding fromFontBox(org.apache.fontbox.encoding.Encoding encoding) { - for (int i=1;i<size;i++) + // todo: could optimise this by looking for specific subclasses + Map<Integer,String> codeToName = encoding.getCodeToNameMap(); + Type1Encoding enc = new Type1Encoding(); + + for (Integer code : codeToName.keySet()) { - addCharacterEncoding(i, NOTDEF); + enc.add(code, codeToName.get(code)); } + + return enc; + } + + /** + * Creates an empty encoding. + */ + public Type1Encoding() + { } /** - * {@inheritDoc} + * Creates an encoding from the given AFM font metrics. + * + * @param fontMetrics AFM font metrics. */ + public Type1Encoding(FontMetrics fontMetrics) + { + for (CharMetric nextMetric : fontMetrics.getCharMetrics()) + { + add(nextMetric.getCharacterCode(), nextMetric.getName()); + } + } + + @Override public COSBase getCOSObject() { return null; } - } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/encoding/WinAnsiEncoding.java Sat Aug 30 02:26:57 2014 @@ -39,232 +39,230 @@ public class WinAnsiEncoding extends Enc */ public WinAnsiEncoding() { - addCharacterEncoding(0101, "A"); - addCharacterEncoding(0306, "AE"); - addCharacterEncoding(0301, "Aacute"); - addCharacterEncoding(0302, "Acircumflex"); - addCharacterEncoding(0304, "Adieresis"); - addCharacterEncoding(0300, "Agrave"); - addCharacterEncoding(0305, "Aring"); - addCharacterEncoding(0303, "Atilde"); - addCharacterEncoding(0102, "B"); - addCharacterEncoding(0103, "C"); - addCharacterEncoding(0307, "Ccedilla"); - addCharacterEncoding(0104, "D"); - addCharacterEncoding(0105, "E"); - addCharacterEncoding(0311, "Eacute"); - addCharacterEncoding(0312, "Ecircumflex"); - addCharacterEncoding(0313, "Edieresis"); - addCharacterEncoding(0310, "Egrave"); - addCharacterEncoding(0320, "Eth"); - addCharacterEncoding(0200, "Euro"); - addCharacterEncoding(0106, "F"); - addCharacterEncoding(0107, "G"); - addCharacterEncoding(0110, "H"); - addCharacterEncoding(0111, "I"); - addCharacterEncoding(0315, "Iacute"); - addCharacterEncoding(0316, "Icircumflex"); - addCharacterEncoding(0317, "Idieresis"); - addCharacterEncoding(0314, "Igrave"); - addCharacterEncoding(0112, "J"); - addCharacterEncoding(0113, "K"); - addCharacterEncoding(0114, "L"); - addCharacterEncoding(0115, "M"); - addCharacterEncoding(0116, "N"); - addCharacterEncoding(0321, "Ntilde"); - addCharacterEncoding(0117, "O"); - addCharacterEncoding(0214, "OE"); - addCharacterEncoding(0323, "Oacute"); - addCharacterEncoding(0324, "Ocircumflex"); - addCharacterEncoding(0326, "Odieresis"); - addCharacterEncoding(0322, "Ograve"); - addCharacterEncoding(0330, "Oslash"); - addCharacterEncoding(0325, "Otilde"); - addCharacterEncoding(0120, "P"); - addCharacterEncoding(0121, "Q"); - addCharacterEncoding(0122, "R"); - addCharacterEncoding(0123, "S"); - addCharacterEncoding(0212, "Scaron"); - addCharacterEncoding(0124, "T"); - addCharacterEncoding(0336, "Thorn"); - addCharacterEncoding(0125, "U"); - addCharacterEncoding(0332, "Uacute"); - addCharacterEncoding(0333, "Ucircumflex"); - addCharacterEncoding(0334, "Udieresis"); - addCharacterEncoding(0331, "Ugrave"); - addCharacterEncoding(0126, "V"); - addCharacterEncoding(0127, "W"); - addCharacterEncoding(0130, "X"); - addCharacterEncoding(0131, "Y"); - addCharacterEncoding(0335, "Yacute"); - addCharacterEncoding(0237, "Ydieresis"); - addCharacterEncoding(0132, "Z"); - addCharacterEncoding(0216, "Zcaron"); - addCharacterEncoding(0141, "a"); - addCharacterEncoding(0341, "aacute"); - addCharacterEncoding(0342, "acircumflex"); - addCharacterEncoding(0264, "acute"); - addCharacterEncoding(0344, "adieresis"); - addCharacterEncoding(0346, "ae"); - addCharacterEncoding(0340, "agrave"); - addCharacterEncoding(046, "ampersand"); - addCharacterEncoding(0345, "aring"); - addCharacterEncoding(0136, "asciicircum"); - addCharacterEncoding(0176, "asciitilde"); - addCharacterEncoding(052, "asterisk"); - addCharacterEncoding(0100, "at"); - addCharacterEncoding(0343, "atilde"); - addCharacterEncoding(0142, "b"); - addCharacterEncoding(0134, "backslash"); - addCharacterEncoding(0174, "bar"); - addCharacterEncoding(0173, "braceleft"); - addCharacterEncoding(0175, "braceright"); - addCharacterEncoding(0133, "bracketleft"); - addCharacterEncoding(0135, "bracketright"); - addCharacterEncoding(0246, "brokenbar"); - addCharacterEncoding(0225, "bullet"); - addCharacterEncoding(0143, "c"); - addCharacterEncoding(0347, "ccedilla"); - addCharacterEncoding(0270, "cedilla"); - addCharacterEncoding(0242, "cent"); - addCharacterEncoding(0210, "circumflex"); - addCharacterEncoding(072, "colon"); - addCharacterEncoding(054, "comma"); - addCharacterEncoding(0251, "copyright"); - addCharacterEncoding(0244, "currency"); - addCharacterEncoding(0144, "d"); - addCharacterEncoding(0206, "dagger"); - addCharacterEncoding(0207, "daggerdbl"); - addCharacterEncoding(0260, "degree"); - addCharacterEncoding(0250, "dieresis"); - addCharacterEncoding(0367, "divide"); - addCharacterEncoding(044, "dollar"); - addCharacterEncoding(0145, "e"); - addCharacterEncoding(0351, "eacute"); - addCharacterEncoding(0352, "ecircumflex"); - addCharacterEncoding(0353, "edieresis"); - addCharacterEncoding(0350, "egrave"); - addCharacterEncoding(070, "eight"); - addCharacterEncoding(0205, "ellipsis"); - addCharacterEncoding(0227, "emdash"); - addCharacterEncoding(0226, "endash"); - addCharacterEncoding(075, "equal"); - addCharacterEncoding(0360, "eth"); - addCharacterEncoding(041, "exclam"); - addCharacterEncoding(0241, "exclamdown"); - addCharacterEncoding(0146, "f"); - addCharacterEncoding(065, "five"); - addCharacterEncoding(0203, "florin"); - addCharacterEncoding(064, "four"); - addCharacterEncoding(0147, "g"); - addCharacterEncoding(0337, "germandbls"); - addCharacterEncoding(0140, "grave"); - addCharacterEncoding(076, "greater"); - addCharacterEncoding(0253, "guillemotleft"); - addCharacterEncoding(0273, "guillemotright"); - addCharacterEncoding(0213, "guilsinglleft"); - addCharacterEncoding(0233, "guilsinglright"); - addCharacterEncoding(0150, "h"); - addCharacterEncoding(055, "hyphen"); - addCharacterEncoding(0151, "i"); - addCharacterEncoding(0355, "iacute"); - addCharacterEncoding(0356, "icircumflex"); - addCharacterEncoding(0357, "idieresis"); - addCharacterEncoding(0354, "igrave"); - addCharacterEncoding(0152, "j"); - addCharacterEncoding(0153, "k"); - addCharacterEncoding(0154, "l"); - addCharacterEncoding(074, "less"); - addCharacterEncoding(0254, "logicalnot"); - addCharacterEncoding(0155, "m"); - addCharacterEncoding(0257, "macron"); - addCharacterEncoding(0265, "mu"); - addCharacterEncoding(0327, "multiply"); - addCharacterEncoding(0156, "n"); - addCharacterEncoding(071, "nine"); - addCharacterEncoding(0361, "ntilde"); - addCharacterEncoding(043, "numbersign"); - addCharacterEncoding(0157, "o"); - addCharacterEncoding(0363, "oacute"); - addCharacterEncoding(0364, "ocircumflex"); - addCharacterEncoding(0366, "odieresis"); - addCharacterEncoding(0234, "oe"); - addCharacterEncoding(0362, "ograve"); - addCharacterEncoding(061, "one"); - addCharacterEncoding(0275, "onehalf"); - addCharacterEncoding(0274, "onequarter"); - addCharacterEncoding(0271, "onesuperior"); - addCharacterEncoding(0252, "ordfeminine"); - addCharacterEncoding(0272, "ordmasculine"); - addCharacterEncoding(0370, "oslash"); - addCharacterEncoding(0365, "otilde"); - addCharacterEncoding(0160, "p"); - addCharacterEncoding(0266, "paragraph"); - addCharacterEncoding(050, "parenleft"); - addCharacterEncoding(051, "parenright"); - addCharacterEncoding(045, "percent"); - addCharacterEncoding(056, "period"); - addCharacterEncoding(0267, "periodcentered"); - addCharacterEncoding(0211, "perthousand"); - addCharacterEncoding(053, "plus"); - addCharacterEncoding(0261, "plusminus"); - addCharacterEncoding(0161, "q"); - addCharacterEncoding(077, "question"); - addCharacterEncoding(0277, "questiondown"); - addCharacterEncoding(042, "quotedbl"); - addCharacterEncoding(0204, "quotedblbase"); - addCharacterEncoding(0223, "quotedblleft"); - addCharacterEncoding(0224, "quotedblright"); - addCharacterEncoding(0221, "quoteleft"); - addCharacterEncoding(0222, "quoteright"); - addCharacterEncoding(0202, "quotesinglbase"); - addCharacterEncoding(047, "quotesingle"); - addCharacterEncoding(0162, "r"); - addCharacterEncoding(0256, "registered"); - addCharacterEncoding(0163, "s"); - addCharacterEncoding(0232, "scaron"); - addCharacterEncoding(0247, "section"); - addCharacterEncoding(073, "semicolon"); - addCharacterEncoding(067, "seven"); - addCharacterEncoding(066, "six"); - addCharacterEncoding(057, "slash"); - addCharacterEncoding(040, "space"); - addCharacterEncoding(0243, "sterling"); - addCharacterEncoding(0164, "t"); - addCharacterEncoding(0376, "thorn"); - addCharacterEncoding(063, "three"); - addCharacterEncoding(0276, "threequarters"); - addCharacterEncoding(0263, "threesuperior"); - addCharacterEncoding(0230, "tilde"); - addCharacterEncoding(0231, "trademark"); - addCharacterEncoding(062, "two"); - addCharacterEncoding(0262, "twosuperior"); - addCharacterEncoding(0165, "u"); - addCharacterEncoding(0372, "uacute"); - addCharacterEncoding(0373, "ucircumflex"); - addCharacterEncoding(0374, "udieresis"); - addCharacterEncoding(0371, "ugrave"); - addCharacterEncoding(0137, "underscore"); - addCharacterEncoding(0166, "v"); - addCharacterEncoding(0167, "w"); - addCharacterEncoding(0170, "x"); - addCharacterEncoding(0171, "y"); - addCharacterEncoding(0375, "yacute"); - addCharacterEncoding(0377, "ydieresis"); - addCharacterEncoding(0245, "yen"); - addCharacterEncoding(0172, "z"); - addCharacterEncoding(0236, "zcaron"); - addCharacterEncoding(060, "zero"); + add(0101, "A"); + add(0306, "AE"); + add(0301, "Aacute"); + add(0302, "Acircumflex"); + add(0304, "Adieresis"); + add(0300, "Agrave"); + add(0305, "Aring"); + add(0303, "Atilde"); + add(0102, "B"); + add(0103, "C"); + add(0307, "Ccedilla"); + add(0104, "D"); + add(0105, "E"); + add(0311, "Eacute"); + add(0312, "Ecircumflex"); + add(0313, "Edieresis"); + add(0310, "Egrave"); + add(0320, "Eth"); + add(0200, "Euro"); + add(0106, "F"); + add(0107, "G"); + add(0110, "H"); + add(0111, "I"); + add(0315, "Iacute"); + add(0316, "Icircumflex"); + add(0317, "Idieresis"); + add(0314, "Igrave"); + add(0112, "J"); + add(0113, "K"); + add(0114, "L"); + add(0115, "M"); + add(0116, "N"); + add(0321, "Ntilde"); + add(0117, "O"); + add(0214, "OE"); + add(0323, "Oacute"); + add(0324, "Ocircumflex"); + add(0326, "Odieresis"); + add(0322, "Ograve"); + add(0330, "Oslash"); + add(0325, "Otilde"); + add(0120, "P"); + add(0121, "Q"); + add(0122, "R"); + add(0123, "S"); + add(0212, "Scaron"); + add(0124, "T"); + add(0336, "Thorn"); + add(0125, "U"); + add(0332, "Uacute"); + add(0333, "Ucircumflex"); + add(0334, "Udieresis"); + add(0331, "Ugrave"); + add(0126, "V"); + add(0127, "W"); + add(0130, "X"); + add(0131, "Y"); + add(0335, "Yacute"); + add(0237, "Ydieresis"); + add(0132, "Z"); + add(0216, "Zcaron"); + add(0141, "a"); + add(0341, "aacute"); + add(0342, "acircumflex"); + add(0264, "acute"); + add(0344, "adieresis"); + add(0346, "ae"); + add(0340, "agrave"); + add(046, "ampersand"); + add(0345, "aring"); + add(0136, "asciicircum"); + add(0176, "asciitilde"); + add(052, "asterisk"); + add(0100, "at"); + add(0343, "atilde"); + add(0142, "b"); + add(0134, "backslash"); + add(0174, "bar"); + add(0173, "braceleft"); + add(0175, "braceright"); + add(0133, "bracketleft"); + add(0135, "bracketright"); + add(0246, "brokenbar"); + add(0225, "bullet"); + add(0143, "c"); + add(0347, "ccedilla"); + add(0270, "cedilla"); + add(0242, "cent"); + add(0210, "circumflex"); + add(072, "colon"); + add(054, "comma"); + add(0251, "copyright"); + add(0244, "currency"); + add(0144, "d"); + add(0206, "dagger"); + add(0207, "daggerdbl"); + add(0260, "degree"); + add(0250, "dieresis"); + add(0367, "divide"); + add(044, "dollar"); + add(0145, "e"); + add(0351, "eacute"); + add(0352, "ecircumflex"); + add(0353, "edieresis"); + add(0350, "egrave"); + add(070, "eight"); + add(0205, "ellipsis"); + add(0227, "emdash"); + add(0226, "endash"); + add(075, "equal"); + add(0360, "eth"); + add(041, "exclam"); + add(0241, "exclamdown"); + add(0146, "f"); + add(065, "five"); + add(0203, "florin"); + add(064, "four"); + add(0147, "g"); + add(0337, "germandbls"); + add(0140, "grave"); + add(076, "greater"); + add(0253, "guillemotleft"); + add(0273, "guillemotright"); + add(0213, "guilsinglleft"); + add(0233, "guilsinglright"); + add(0150, "h"); + add(055, "hyphen"); + add(0151, "i"); + add(0355, "iacute"); + add(0356, "icircumflex"); + add(0357, "idieresis"); + add(0354, "igrave"); + add(0152, "j"); + add(0153, "k"); + add(0154, "l"); + add(074, "less"); + add(0254, "logicalnot"); + add(0155, "m"); + add(0257, "macron"); + add(0265, "mu"); + add(0327, "multiply"); + add(0156, "n"); + add(071, "nine"); + add(0361, "ntilde"); + add(043, "numbersign"); + add(0157, "o"); + add(0363, "oacute"); + add(0364, "ocircumflex"); + add(0366, "odieresis"); + add(0234, "oe"); + add(0362, "ograve"); + add(061, "one"); + add(0275, "onehalf"); + add(0274, "onequarter"); + add(0271, "onesuperior"); + add(0252, "ordfeminine"); + add(0272, "ordmasculine"); + add(0370, "oslash"); + add(0365, "otilde"); + add(0160, "p"); + add(0266, "paragraph"); + add(050, "parenleft"); + add(051, "parenright"); + add(045, "percent"); + add(056, "period"); + add(0267, "periodcentered"); + add(0211, "perthousand"); + add(053, "plus"); + add(0261, "plusminus"); + add(0161, "q"); + add(077, "question"); + add(0277, "questiondown"); + add(042, "quotedbl"); + add(0204, "quotedblbase"); + add(0223, "quotedblleft"); + add(0224, "quotedblright"); + add(0221, "quoteleft"); + add(0222, "quoteright"); + add(0202, "quotesinglbase"); + add(047, "quotesingle"); + add(0162, "r"); + add(0256, "registered"); + add(0163, "s"); + add(0232, "scaron"); + add(0247, "section"); + add(073, "semicolon"); + add(067, "seven"); + add(066, "six"); + add(057, "slash"); + add(040, "space"); + add(0243, "sterling"); + add(0164, "t"); + add(0376, "thorn"); + add(063, "three"); + add(0276, "threequarters"); + add(0263, "threesuperior"); + add(0230, "tilde"); + add(0231, "trademark"); + add(062, "two"); + add(0262, "twosuperior"); + add(0165, "u"); + add(0372, "uacute"); + add(0373, "ucircumflex"); + add(0374, "udieresis"); + add(0371, "ugrave"); + add(0137, "underscore"); + add(0166, "v"); + add(0167, "w"); + add(0170, "x"); + add(0171, "y"); + add(0375, "yacute"); + add(0377, "ydieresis"); + add(0245, "yen"); + add(0172, "z"); + add(0236, "zcaron"); + add(060, "zero"); // adding some additional mappings as defined in Appendix D of the pdf spec - // we must not add them to both mappings as the nameToCode mapping - // wouldn't be unique - codeToName.put(0240, "space"); - codeToName.put(0255, "hyphen"); + add(0240, "space"); + add(0255, "hyphen"); for (int i = 041; i <= 255; i++) { if (!codeToName.containsKey(i)) { - codeToName.put(i, "bullet"); + add(i, "bullet"); } } } Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java?rev=1621411&r1=1621410&r2=1621411&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/pdmodel/PDResources.java Sat Aug 30 02:26:57 2014 @@ -196,15 +196,26 @@ public class PDResources implements COSO } else { + Map<COSDictionary, PDFont> seenFonts = new HashMap<COSDictionary, PDFont>(); for (COSName fontName : fontsDictionary.keySet()) { COSBase font = fontsDictionary.getDictionaryObject(fontName); + // data-000174.pdf contains a font that is a COSArray, looks to be an error in the // PDF, we will just ignore entries that are not dictionaries. if (font instanceof COSDictionary) { - PDFont newFont = PDFontFactory.createFont((COSDictionary) font); - fonts.put(fontName.getName(), newFont); + // some fonts may appear many times (see test_1fd9a_test.pdf) + if (seenFonts.containsKey(font)) + { + fonts.put(fontName.getName(), seenFonts.get(font)); + } + else + { + PDFont newFont = PDFontFactory.createFont((COSDictionary) font); + fonts.put(fontName.getName(), newFont); + seenFonts.put((COSDictionary) font, newFont); + } } } }