Hi team,

Is somebody picking up patch review of this stuff by Karl?

ps. I'm very busy lately with a variety of things, but if not I can in a
few weeks look into this. Hopefully, for Karl's efforts, will somebody
else do a review before that.

Kind regards,

Philip

On Sat, 2012-04-14 at 13:44 +0100, Karl Relton wrote:
> My first patch inadvertently set the wrong tag type - this patch has that 
> corrected.
> 
> ----
> 
> As per thread starting at
> http://mail.gnome.org/archives/tracker-list/2012-April/msg00012.html
> 
> here is a proposed patch that simplifies (and improves) the indexing of
> oasis text files (.odt files). With this patch you get alot more of the
> content indexed on a typical file saved by Libreoffice, and so they are
> far more likely to show up in searches.
> 
> Karl
> 
> --- tracker-0.14.0.orig/src/tracker-extract/tracker-extract-oasis.c   
> 2012-04-09 13:31:04.132949981 +0100
> +++ tracker-0.14.0/src/tracker-extract/tracker-extract-oasis.c        
> 2012-04-09 19:13:15.553943645 +0100
> @@ -59,7 +59,6 @@ typedef struct {
>  
>  typedef struct {
>       ODTTagType current;
> -     gboolean styles_present;
>       ODTFileType file_type;
>       GString *content;
>       gulong bytes_pending;
> @@ -128,7 +127,6 @@ extract_oasis_content (const gchar
>       /* Create parse info */
>       info.current = ODT_TAG_TYPE_UNKNOWN;
>       info.file_type = file_type;
> -     info.styles_present = FALSE;
>       info.content = g_string_new ("");
>       info.bytes_pending = total_bytes;
>  
> @@ -391,45 +389,12 @@ xml_start_element_handler_content (GMark
>  
>       switch (data->file_type) {
>       case FILE_TYPE_ODT:
> -             if ((g_ascii_strcasecmp (element_name, "text:table-of-content") 
> == 0) ||
> -                 (g_ascii_strcasecmp (element_name, "text:table-index") == 
> 0) ||
> -                 (g_ascii_strcasecmp (element_name, 
> "text:illustration-index") == 0) ||
> -                 (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
> -                     data->styles_present = TRUE;
> -             } else if (g_ascii_strcasecmp (element_name, 
> "table:table-cell") == 0) {
> -                     data->current = ODT_TAG_TYPE_WORD_TEXT;
> -             } else if (g_ascii_strcasecmp (element_name, "text:p") == 0) {
> -                     if (data->styles_present) {
> -                             data->current = ODT_TAG_TYPE_WORD_TEXT;
> -                             break;
> -                     }
> -
> -                     for (a = attribute_names, v = attribute_values; *a; 
> ++a, ++v) {
> -                             if (g_ascii_strcasecmp (*a, "text:style-name") 
> != 0) {
> -                                     continue;
> -                             }
> -
> -                             if ((g_ascii_strcasecmp (*v, "title-article") 
> == 0) ||
> -                                 (g_ascii_strcasecmp (*v, "para-padding") == 
> 0) ||
> -                                 (g_ascii_strcasecmp (*v, "para-screen") == 
> 0)) {
> -                                     data->current = ODT_TAG_TYPE_WORD_TEXT;
> -                             }
> -                     }
> -             } else if (g_ascii_strcasecmp (element_name, "text:h") == 0) {
> -                     for (a = attribute_names, v = attribute_values; *a; 
> ++a, ++v) {
> -                             if (g_ascii_strcasecmp (*a, "text:style-name") 
> != 0) {
> -                                     continue;
> -                             }
> -
> -                             if (g_ascii_strncasecmp (*v, "Heading", 7) == 
> 0) {
> -                                     data->current = ODT_TAG_TYPE_WORD_TEXT;
> -                             }
> -                     }
> -             } else if (g_ascii_strcasecmp (element_name, "text:span") == 0) 
> {
> -                     data->current = ODT_TAG_TYPE_WORD_TEXT;
> -             } else if ((g_ascii_strcasecmp (element_name, "text:a") == 0) ||
> -                        (g_ascii_strcasecmp (element_name, "text:s") == 0)) {
> -                     data->current = ODT_TAG_TYPE_WORD_TEXT;
> +             if ((g_ascii_strcasecmp (element_name, "text:p") == 0) ||
> +                 (g_ascii_strcasecmp (element_name, "text:h") == 0) ||
> +                 (g_ascii_strcasecmp (element_name, "text:a") == 0) ||
> +                 (g_ascii_strcasecmp (element_name, "text:span") == 0) ||
> +                 (g_ascii_strcasecmp (element_name, "table:table-cell")) == 
> 0) {
> +                     data->current = ODT_TAG_TYPE_WORD_TEXT;
>               } else {
>                       data->current = -1;
>               }
> @@ -461,23 +426,8 @@ xml_end_element_handler_content (GMarkup
>  {
>       ODTContentParseInfo *data = user_data;
>  
> -     switch (data->file_type) {
> -     case FILE_TYPE_ODT:
> -             if ((g_ascii_strcasecmp (element_name, "text:table-of-content") 
> == 0) ||
> -                 (g_ascii_strcasecmp (element_name, "text:table-index") == 
> 0) ||
> -                 (g_ascii_strcasecmp (element_name, 
> "text:illustration-index") == 0) ||
> -                 (g_ascii_strcasecmp (element_name, "text:section") == 0)) {
> -                     data->styles_present = FALSE;
> -             }
> -             break;
> -     default:
> -             break;
> -     }
> +     data->current = -1;
>  
> -     if ((g_ascii_strcasecmp (element_name, "text:a") != 0) &&
> -         (g_ascii_strcasecmp (element_name, "text:s") != 0)) {
> -             data->current = -1;
> -     }
>  }
>  
>  static void
> 
> 
> 
> _______________________________________________
> tracker-list mailing list
> tracker-list@gnome.org
> http://mail.gnome.org/mailman/listinfo/tracker-list
> 

-- 


Philip Van Hoof
Software developer
Codeminded BVBA - http://codeminded.be

_______________________________________________
tracker-list mailing list
tracker-list@gnome.org
https://mail.gnome.org/mailman/listinfo/tracker-list

Reply via email to