pjfanning commented on code in PR #1044:
URL: https://github.com/apache/poi/pull/1044#discussion_r3052679259
##########
poi-scratchpad/src/test/java/org/apache/poi/hwpf/TestHWPFParser.java:
##########
@@ -39,6 +44,47 @@ void testDoc() throws Exception {
}
}
+ /**
+ * Test reading a real-world .doc file.
+ * This test now handles non-standard formatting that WPS/Word can open.
+ */
+ @Test
+ void testDocRead() throws Exception {
+ try (
+ InputStream stream =
HWPFTestDataSamples.openSampleFileStream("issue_1041.doc");
+ HWPFDocument doc = HWPFParser.parse(stream)
+ ) {
+ WordExtractor extractor = new WordExtractor(doc);
+ String text = extractor.getText();
+ assertNotNull(doc);
+ assertNotNull(text);
+ }
+ }
+
+ @Test
+ void testWpsDocByFs()throws Exception{
+ POIDataSamples instance = POIDataSamples.getDocumentInstance();
+ File file = instance.getFile("issue_1041.doc");
+ POIFSFileSystem fs = new POIFSFileSystem(file);
+ WordExtractor extractor = new WordExtractor(fs);
+ String text = extractor.getText();
+ assertNotNull(text);
+ }
+
+ @Test
+ void testOffice97_2003DocRead() throws Exception {
+ try (
+ InputStream stream =
HWPFTestDataSamples.openSampleFileStream("issue_1041_2.doc");
+ HWPFDocument doc = HWPFParser.parse(stream)
+ ) {
+ WordExtractor extractor = new WordExtractor(doc);
+ String text = extractor.getText();
+ assertNotNull(doc);
+ assertNotNull(text);
Review Comment:
must test the actual text - it being not null is not enough
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]