Author: lehmi Date: Sun Feb 20 18:00:28 2011 New Revision: 1072665 URL: http://svn.apache.org/viewvc?rev=1072665&view=rev Log: PDFBOX-956: undoing the change committed in 1070125 to avoid java 1.6 dependency
Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Modified: pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java URL: http://svn.apache.org/viewvc/pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java?rev=1072665&r1=1072664&r2=1072665&view=diff ============================================================================== --- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java (original) +++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/util/PDFTextStripper.java Sun Feb 20 18:00:28 2011 @@ -26,11 +26,7 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.NavigableMap; -import java.util.NavigableSet; import java.util.Properties; -import java.util.TreeMap; -import java.util.TreeSet; import java.util.Vector; import java.util.regex.Pattern; @@ -159,8 +155,7 @@ public class PDFTextStripper extends PDF */ protected Vector<List<TextPosition>> charactersByArticle = new Vector<List<TextPosition>>(); - private Map<String, NavigableMap<Float, NavigableSet<Float>>> characterListMapping = - new HashMap<String, NavigableMap<Float, NavigableSet<Float>>>(); + private Map<String, List<TextPosition>> characterListMapping = new HashMap<String, List<TextPosition>>(); /** * encoding that text will be written in (or null). @@ -885,10 +880,10 @@ public class PDFTextStripper extends PDF String textCharacter = text.getCharacter(); float textX = text.getX(); float textY = text.getY(); - NavigableMap<Float, NavigableSet<Float>> sameTextCharacters = characterListMapping.get( textCharacter ); + List<TextPosition> sameTextCharacters = (List<TextPosition>)characterListMapping.get( textCharacter ); if( sameTextCharacters == null ) { - sameTextCharacters = new TreeMap<Float, NavigableSet<Float>>(); + sameTextCharacters = new ArrayList<TextPosition>(); characterListMapping.put( textCharacter, sameTextCharacters ); } @@ -905,29 +900,27 @@ public class PDFTextStripper extends PDF // boolean suppressCharacter = false; float tolerance = (text.getWidth()/textCharacter.length())/3.0f; - - NavigableMap<Float, NavigableSet<Float>> xMatches = - sameTextCharacters.subMap(textX - tolerance , false, textX + tolerance , false); - for (NavigableSet<Float> xMatch : xMatches.values()) - { - NavigableSet<Float> yMatches = - xMatch.subSet(textY - tolerance , false, textY + tolerance , false); - if (!yMatches.isEmpty()) + for( int i=0; i<sameTextCharacters.size() && textCharacter != null; i++ ) + { + TextPosition character = sameTextCharacters.get( i ); + String charCharacter = character.getCharacter(); + float charX = character.getX(); + float charY = character.getY(); + //only want to suppress + + if( charCharacter != null && + //charCharacter.equals( textCharacter ) && + within( charX, textX, tolerance ) && + within( charY, + textY, + tolerance ) ) { suppressCharacter = true; - break; } } - if( !suppressCharacter ) { - NavigableSet<Float> ySet = sameTextCharacters.get(textX); - if (ySet == null) - { - ySet = new TreeSet<Float>(); - sameTextCharacters.put( textX, ySet ); - } - ySet.add( textY ); + sameTextCharacters.add( text ); showCharacter = true; } }