[ 
https://issues.apache.org/jira/browse/TIKA-695?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

Etienne Jouvin updated TIKA-695:
--------------------------------

    Description: 
Parser on office Xfiles do not get custom properties.

In class MetadataExtractor, method extract, only core and extended properties 
are retrieve.
I added something like this:

extractMetadata(extractor.getCustomProperties(), metadata);

{quote}
        /**
         * Add this method to read custom properties on document.
         * 
         * @param properties All custom properties.
         * @param metadata Metadata to complete with read properties.
         */
        private void extractMetadata(CustomProperties properties, Metadata 
metadata) {
                
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties 
propsHolder = properties.getUnderlyingProperties();

                String value = null;
                DateUtils dateUtils = DateUtils.getInstance();
                BigDecimal bigDecimal;

                for (CTProperty property : propsHolder.getPropertyList()) {
                        /* Parse each property */
                        if (property.isSetLpwstr()) {
                                value = property.getLpwstr();
                        } else if (property.isSetFiletime()) {
                                value = 
dateUtils.convertDate(property.getFiletime(), null);
                        } else if (property.isSetDate()) {
                                value = 
dateUtils.convertDate(property.getDate(), null);
                        } else if (property.isSetDecimal()) {
                                bigDecimal = property.getDecimal();
                                value = null == bigDecimal ? null : 
bigDecimal.toString();
                        } else if (property.isSetBool()) {
                                value = 
BooleanUtils.toStringTrueFalse(property.getBool());
                        } else if (property.isSetInt()) {
                                value = Integer.toString(property.getInt());
                        } else if (property.isSetLpstr()) {
                                value = property.getLpstr();
                        } else if (property.isSetI4()) {
                                /* Number in Excel for example.... Why i4 ? Ask 
microsoft. */
                                value = Integer.toString(property.getI4());
                        } else {
                                /* For other type, do nothing. */
                                continue;
                        }

                        /* Add the custom prefix, as done in old office format. 
*/
                        addProperty(metadata, "custom:" + property.getName(), 
value);
                }
        }
{quote}


  was:
Parser on office Xfiles do not get custom properties.

In class MetadataExtractor, method extract, only core and extended properties 
are retrieve.
I added something like this:

extractMetadata(extractor.getCustomProperties(), metadata);

        /**
         * Add this method to read custom properties on document.
         * 
         * @param properties All custom properties.
         * @param metadata Metadata to complete with read properties.
         */
        private void extractMetadata(CustomProperties properties, Metadata 
metadata) {
                
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties 
propsHolder = properties.getUnderlyingProperties();

                String value = null;
                DateUtils dateUtils = DateUtils.getInstance();
                BigDecimal bigDecimal;

                for (CTProperty property : propsHolder.getPropertyList()) {
                        /* Parse each property */
                        if (property.isSetLpwstr()) {
                                value = property.getLpwstr();
                        } else if (property.isSetFiletime()) {
                                value = 
dateUtils.convertDate(property.getFiletime(), null);
                        } else if (property.isSetDate()) {
                                value = 
dateUtils.convertDate(property.getDate(), null);
                        } else if (property.isSetDecimal()) {
                                bigDecimal = property.getDecimal();
                                value = null == bigDecimal ? null : 
bigDecimal.toString();
                        } else if (property.isSetBool()) {
                                value = 
BooleanUtils.toStringTrueFalse(property.getBool());
                        } else if (property.isSetInt()) {
                                value = Integer.toString(property.getInt());
                        } else if (property.isSetLpstr()) {
                                value = property.getLpstr();
                        } else if (property.isSetI4()) {
                                /* Number in Excel for example.... Why i4 ? Ask 
microsoft. */
                                value = Integer.toString(property.getI4());
                        } else {
                                /* For other type, do nothing. */
                                continue;
                        }

                        /* Add the custom prefix, as done in old office format. 
*/
                        addProperty(metadata, "custom:" + property.getName(), 
value);
                }
        }



> Custom properties on xlsx, docx, pptx
> -------------------------------------
>
>                 Key: TIKA-695
>                 URL: https://issues.apache.org/jira/browse/TIKA-695
>             Project: Tika
>          Issue Type: Bug
>          Components: parser
>    Affects Versions: 1.0
>         Environment: All OS
>            Reporter: Etienne Jouvin
>            Priority: Minor
>
> Parser on office Xfiles do not get custom properties.
> In class MetadataExtractor, method extract, only core and extended properties 
> are retrieve.
> I added something like this:
> extractMetadata(extractor.getCustomProperties(), metadata);
> {quote}
>       /**
>        * Add this method to read custom properties on document.
>        * 
>        * @param properties All custom properties.
>        * @param metadata Metadata to complete with read properties.
>        */
>       private void extractMetadata(CustomProperties properties, Metadata 
> metadata) {
>               
> org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties 
> propsHolder = properties.getUnderlyingProperties();
>               String value = null;
>               DateUtils dateUtils = DateUtils.getInstance();
>               BigDecimal bigDecimal;
>               for (CTProperty property : propsHolder.getPropertyList()) {
>                       /* Parse each property */
>                       if (property.isSetLpwstr()) {
>                               value = property.getLpwstr();
>                       } else if (property.isSetFiletime()) {
>                               value = 
> dateUtils.convertDate(property.getFiletime(), null);
>                       } else if (property.isSetDate()) {
>                               value = 
> dateUtils.convertDate(property.getDate(), null);
>                       } else if (property.isSetDecimal()) {
>                               bigDecimal = property.getDecimal();
>                               value = null == bigDecimal ? null : 
> bigDecimal.toString();
>                       } else if (property.isSetBool()) {
>                               value = 
> BooleanUtils.toStringTrueFalse(property.getBool());
>                       } else if (property.isSetInt()) {
>                               value = Integer.toString(property.getInt());
>                       } else if (property.isSetLpstr()) {
>                               value = property.getLpstr();
>                       } else if (property.isSetI4()) {
>                               /* Number in Excel for example.... Why i4 ? Ask 
> microsoft. */
>                               value = Integer.toString(property.getI4());
>                       } else {
>                               /* For other type, do nothing. */
>                               continue;
>                       }
>                       /* Add the custom prefix, as done in old office format. 
> */
>                       addProperty(metadata, "custom:" + property.getName(), 
> value);
>               }
>       }
> {quote}

--
This message is automatically generated by JIRA.
For more information on JIRA, see: http://www.atlassian.com/software/jira

        

Reply via email to