Author: ogrisel
Date: Tue Apr 19 17:00:23 2011
New Revision: 1095133
URL: http://svn.apache.org/viewvc?rev=1095133&view=rev
Log:
STANBOL-176: missing charset when re-decoding the text
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
Modified:
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
URL:
http://svn.apache.org/viewvc/incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java?rev=1095133&r1=1095132&r2=1095133&view=diff
==============================================================================
---
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
(original)
+++
incubator/stanbol/trunk/enhancer/engines/opennlp-ner/src/main/java/org/apache/stanbol/enhancer/engines/opennlp/impl/NEREngineCore.java
Tue Apr 19 17:00:23 2011
@@ -324,7 +324,8 @@ public class NEREngineCore implements En
if (null == text) {
return null;
}
- byte[] bytes = text.getBytes(Charset.forName("UTF-8"));
+ Charset UTF8 = Charset.forName("UTF-8");
+ byte[] bytes = text.getBytes(UTF8);
for (int i = 0; i < bytes.length; i++) {
byte ch = bytes[i];
// remove any characters outside the valid UTF-8 range as well as
all control characters
@@ -333,6 +334,6 @@ public class NEREngineCore implements En
bytes[i] = ' ';
}
}
- return new String(bytes);
+ return new String(bytes, UTF8);
}
}