Author: lehmi Date: Mon May 12 18:38:54 2014 New Revision: 1594047 URL: http://svn.apache.org/r1594047 Log: PDFBOX-2072: fix the calculation of space char width based on a proposal by Juraj Lonc
Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Modified: pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java URL: http://svn.apache.org/viewvc/pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java?rev=1594047&r1=1594046&r2=1594047&view=diff ============================================================================== --- pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java (original) +++ pdfbox/branches/1.8/pdfbox/src/main/java/org/apache/pdfbox/util/PDFStreamEngine.java Mon May 12 18:38:54 2014 @@ -366,7 +366,7 @@ public class PDFStreamEngine // so lets make it a little bit smaller. spaceWidthText *= .80f; } - else + if (spaceWidthText == 0) { spaceWidthText = 1.0f; // if could not find font, use a generic value } @@ -404,7 +404,7 @@ public class PDFStreamEngine // the space width has to be transformed into display units float spaceWidthDisp = spaceWidthText * fontSizeText * horizontalScalingText - * textMatrix.getValue(0, 0) * ctm.getValue(0, 0); + * textMatrix.getXScale() * ctm.getXScale(); //todo, handle horizontal displacement // get the width and height of this character in text units @@ -443,7 +443,7 @@ public class PDFStreamEngine { spacingText += wordSpacingText; } - textXctm = textMatrix.multiply(ctm, textXctm); + textMatrix.multiply(ctm, textXctm); // Convert textMatrix to display units // We need to instantiate a new Matrix instance here as it is passed to the TextPosition constructor below. Matrix textMatrixStart = textStateParameters.multiply(textXctm); @@ -462,8 +462,8 @@ public class PDFStreamEngine // textMatrixEnd contains the coordinates of the end of the last glyph without // taking characterSpacingText and spacintText into account, otherwise it'll be // impossible to detect new words within text extraction - tempMatrix = textStateParameters.multiply(td, tempMatrix); - textMatrixEnd = tempMatrix.multiply(textXctm, textMatrixEnd); + textStateParameters.multiply(td, tempMatrix); + tempMatrix.multiply(textXctm, textMatrixEnd); final float endXPosition = textMatrixEnd.getXPosition(); final float endYPosition = textMatrixEnd.getYPosition(); @@ -471,7 +471,7 @@ public class PDFStreamEngine tx = ((characterHorizontalDisplacementText)*fontSizeText+characterSpacingText+spacingText) *horizontalScalingText; td.setValue( 2, 0, tx ); - textMatrix = td.multiply(textMatrix, textMatrix ); + td.multiply(textMatrix, textMatrix ); // determine the width of this character // XXX: Note that if we handled vertical text, we should be using Y here