Author: mikemccand
Date: Tue Nov 8 19:07:07 2011
New Revision: 1199387
URL: http://svn.apache.org/viewvc?rev=1199387&view=rev
Log:
TIKA-529: don't allocate byte[] for each byte when detecting IBM420 charset
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java
Modified:
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java
URL:
http://svn.apache.org/viewvc/tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java?rev=1199387&r1=1199386&r2=1199387&view=diff
==============================================================================
---
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java
(original)
+++
tika/trunk/tika-parsers/src/main/java/org/apache/tika/parser/txt/CharsetRecog_sbcs.java
Tue Nov 8 19:07:07 2011
@@ -1297,11 +1297,12 @@ abstract class CharsetRecog_sbcs extends
}
private boolean isLamAlef(byte b) {
- byte shapedLamAlef[] =
{(byte)0xb2,(byte)0xb3,(byte)0xb4,(byte)0xb5,(byte)0xb7,(byte)0xb8 };
- for (int i = 0; i<shapedLamAlef.length; i++)
- if (b == shapedLamAlef[i])
- return true;
- return false;
+ // Return true if byte is any of these:
+ //
+ //
{(byte)0xb2,(byte)0xb3,(byte)0xb4,(byte)0xb5,(byte)0xb7,(byte)0xb8}
+ //
+ // NOTE: 0xb2 is -78; 0xb8 is -72:
+ return (b <= (byte)0xb8) && (b >= (byte)0xb2) && (b != (byte)0xb6);
}
protected void matchFinish(CharsetDetector det) {