Custom properties on xlsx, docx, pptx -------------------------------------
Key: TIKA-695 URL: https://issues.apache.org/jira/browse/TIKA-695 Project: Tika Issue Type: Bug Components: parser Affects Versions: 1.0 Environment: All OS Reporter: Etienne Jouvin Priority: Minor Parser on office Xfiles do not get custom properties. In class MetadataExtractor, method extract, only core and extended properties are retrieve. I added something like this: extractMetadata(extractor.getCustomProperties(), metadata); /** * Add this method to read custom properties on document. * * @param properties All custom properties. * @param metadata Metadata to complete with read properties. */ private void extractMetadata(CustomProperties properties, Metadata metadata) { org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties propsHolder = properties.getUnderlyingProperties(); String value = null; DateUtils dateUtils = DateUtils.getInstance(); BigDecimal bigDecimal; for (CTProperty property : propsHolder.getPropertyList()) { /* Parse each property */ if (property.isSetLpwstr()) { value = property.getLpwstr(); } else if (property.isSetFiletime()) { value = dateUtils.convertDate(property.getFiletime(), null); } else if (property.isSetDate()) { value = dateUtils.convertDate(property.getDate(), null); } else if (property.isSetDecimal()) { bigDecimal = property.getDecimal(); value = null == bigDecimal ? null : bigDecimal.toString(); } else if (property.isSetBool()) { value = BooleanUtils.toStringTrueFalse(property.getBool()); } else if (property.isSetInt()) { value = Integer.toString(property.getInt()); } else if (property.isSetLpstr()) { value = property.getLpstr(); } else if (property.isSetI4()) { /* Number in Excel for example.... Why i4 ? Ask microsoft. */ value = Integer.toString(property.getI4()); } else { /* For other type, do nothing. */ continue; } /* Add the custom prefix, as done in old office format. */ addProperty(metadata, "custom:" + property.getName(), value); } } -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira