[ https://issues.apache.org/jira/browse/TIKA-695?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Etienne Jouvin updated TIKA-695: -------------------------------- Description: Parser on office Xfiles do not get custom properties. In class MetadataExtractor, method extract, only core and extended properties are retrieve. I added something like this: extractMetadata(extractor.getCustomProperties(), metadata); {quote} /** * Add this method to read custom properties on document. * * @param properties All custom properties. * @param metadata Metadata to complete with read properties. */ private void extractMetadata(CustomProperties properties, Metadata metadata) { org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties propsHolder = properties.getUnderlyingProperties(); String value = null; DateUtils dateUtils = DateUtils.getInstance(); BigDecimal bigDecimal; for (CTProperty property : propsHolder.getPropertyList()) { /* Parse each property */ if (property.isSetLpwstr()) { value = property.getLpwstr(); } else if (property.isSetFiletime()) { value = dateUtils.convertDate(property.getFiletime(), null); } else if (property.isSetDate()) { value = dateUtils.convertDate(property.getDate(), null); } else if (property.isSetDecimal()) { bigDecimal = property.getDecimal(); value = null == bigDecimal ? null : bigDecimal.toString(); } else if (property.isSetBool()) { value = BooleanUtils.toStringTrueFalse(property.getBool()); } else if (property.isSetInt()) { value = Integer.toString(property.getInt()); } else if (property.isSetLpstr()) { value = property.getLpstr(); } else if (property.isSetI4()) { /* Number in Excel for example.... Why i4 ? Ask microsoft. */ value = Integer.toString(property.getI4()); } else { /* For other type, do nothing. */ continue; } /* Add the custom prefix, as done in old office format. */ addProperty(metadata, "custom:" + property.getName(), value); } } {quote} was: Parser on office Xfiles do not get custom properties. In class MetadataExtractor, method extract, only core and extended properties are retrieve. I added something like this: extractMetadata(extractor.getCustomProperties(), metadata); /** * Add this method to read custom properties on document. * * @param properties All custom properties. * @param metadata Metadata to complete with read properties. */ private void extractMetadata(CustomProperties properties, Metadata metadata) { org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties propsHolder = properties.getUnderlyingProperties(); String value = null; DateUtils dateUtils = DateUtils.getInstance(); BigDecimal bigDecimal; for (CTProperty property : propsHolder.getPropertyList()) { /* Parse each property */ if (property.isSetLpwstr()) { value = property.getLpwstr(); } else if (property.isSetFiletime()) { value = dateUtils.convertDate(property.getFiletime(), null); } else if (property.isSetDate()) { value = dateUtils.convertDate(property.getDate(), null); } else if (property.isSetDecimal()) { bigDecimal = property.getDecimal(); value = null == bigDecimal ? null : bigDecimal.toString(); } else if (property.isSetBool()) { value = BooleanUtils.toStringTrueFalse(property.getBool()); } else if (property.isSetInt()) { value = Integer.toString(property.getInt()); } else if (property.isSetLpstr()) { value = property.getLpstr(); } else if (property.isSetI4()) { /* Number in Excel for example.... Why i4 ? Ask microsoft. */ value = Integer.toString(property.getI4()); } else { /* For other type, do nothing. */ continue; } /* Add the custom prefix, as done in old office format. */ addProperty(metadata, "custom:" + property.getName(), value); } } > Custom properties on xlsx, docx, pptx > ------------------------------------- > > Key: TIKA-695 > URL: https://issues.apache.org/jira/browse/TIKA-695 > Project: Tika > Issue Type: Bug > Components: parser > Affects Versions: 1.0 > Environment: All OS > Reporter: Etienne Jouvin > Priority: Minor > > Parser on office Xfiles do not get custom properties. > In class MetadataExtractor, method extract, only core and extended properties > are retrieve. > I added something like this: > extractMetadata(extractor.getCustomProperties(), metadata); > {quote} > /** > * Add this method to read custom properties on document. > * > * @param properties All custom properties. > * @param metadata Metadata to complete with read properties. > */ > private void extractMetadata(CustomProperties properties, Metadata > metadata) { > > org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties > propsHolder = properties.getUnderlyingProperties(); > String value = null; > DateUtils dateUtils = DateUtils.getInstance(); > BigDecimal bigDecimal; > for (CTProperty property : propsHolder.getPropertyList()) { > /* Parse each property */ > if (property.isSetLpwstr()) { > value = property.getLpwstr(); > } else if (property.isSetFiletime()) { > value = > dateUtils.convertDate(property.getFiletime(), null); > } else if (property.isSetDate()) { > value = > dateUtils.convertDate(property.getDate(), null); > } else if (property.isSetDecimal()) { > bigDecimal = property.getDecimal(); > value = null == bigDecimal ? null : > bigDecimal.toString(); > } else if (property.isSetBool()) { > value = > BooleanUtils.toStringTrueFalse(property.getBool()); > } else if (property.isSetInt()) { > value = Integer.toString(property.getInt()); > } else if (property.isSetLpstr()) { > value = property.getLpstr(); > } else if (property.isSetI4()) { > /* Number in Excel for example.... Why i4 ? Ask > microsoft. */ > value = Integer.toString(property.getI4()); > } else { > /* For other type, do nothing. */ > continue; > } > /* Add the custom prefix, as done in old office format. > */ > addProperty(metadata, "custom:" + property.getName(), > value); > } > } > {quote} -- This message is automatically generated by JIRA. For more information on JIRA, see: http://www.atlassian.com/software/jira