Hi team, Is somebody picking up patch review of this stuff by Karl?
ps. I'm very busy lately with a variety of things, but if not I can in a few weeks look into this. Hopefully, for Karl's efforts, will somebody else do a review before that. Kind regards, Philip On Sat, 2012-04-14 at 13:44 +0100, Karl Relton wrote: > My first patch inadvertently set the wrong tag type - this patch has that > corrected. > > ---- > > As per thread starting at > http://mail.gnome.org/archives/tracker-list/2012-April/msg00012.html > > here is a proposed patch that simplifies (and improves) the indexing of > oasis text files (.odt files). With this patch you get alot more of the > content indexed on a typical file saved by Libreoffice, and so they are > far more likely to show up in searches. > > Karl > > --- tracker-0.14.0.orig/src/tracker-extract/tracker-extract-oasis.c > 2012-04-09 13:31:04.132949981 +0100 > +++ tracker-0.14.0/src/tracker-extract/tracker-extract-oasis.c > 2012-04-09 19:13:15.553943645 +0100 > @@ -59,7 +59,6 @@ typedef struct { > > typedef struct { > ODTTagType current; > - gboolean styles_present; > ODTFileType file_type; > GString *content; > gulong bytes_pending; > @@ -128,7 +127,6 @@ extract_oasis_content (const gchar > /* Create parse info */ > info.current = ODT_TAG_TYPE_UNKNOWN; > info.file_type = file_type; > - info.styles_present = FALSE; > info.content = g_string_new (""); > info.bytes_pending = total_bytes; > > @@ -391,45 +389,12 @@ xml_start_element_handler_content (GMark > > switch (data->file_type) { > case FILE_TYPE_ODT: > - if ((g_ascii_strcasecmp (element_name, "text:table-of-content") > == 0) || > - (g_ascii_strcasecmp (element_name, "text:table-index") == > 0) || > - (g_ascii_strcasecmp (element_name, > "text:illustration-index") == 0) || > - (g_ascii_strcasecmp (element_name, "text:section") == 0)) { > - data->styles_present = TRUE; > - } else if (g_ascii_strcasecmp (element_name, > "table:table-cell") == 0) { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > - } else if (g_ascii_strcasecmp (element_name, "text:p") == 0) { > - if (data->styles_present) { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > - break; > - } > - > - for (a = attribute_names, v = attribute_values; *a; > ++a, ++v) { > - if (g_ascii_strcasecmp (*a, "text:style-name") > != 0) { > - continue; > - } > - > - if ((g_ascii_strcasecmp (*v, "title-article") > == 0) || > - (g_ascii_strcasecmp (*v, "para-padding") == > 0) || > - (g_ascii_strcasecmp (*v, "para-screen") == > 0)) { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > - } > - } > - } else if (g_ascii_strcasecmp (element_name, "text:h") == 0) { > - for (a = attribute_names, v = attribute_values; *a; > ++a, ++v) { > - if (g_ascii_strcasecmp (*a, "text:style-name") > != 0) { > - continue; > - } > - > - if (g_ascii_strncasecmp (*v, "Heading", 7) == > 0) { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > - } > - } > - } else if (g_ascii_strcasecmp (element_name, "text:span") == 0) > { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > - } else if ((g_ascii_strcasecmp (element_name, "text:a") == 0) || > - (g_ascii_strcasecmp (element_name, "text:s") == 0)) { > - data->current = ODT_TAG_TYPE_WORD_TEXT; > + if ((g_ascii_strcasecmp (element_name, "text:p") == 0) || > + (g_ascii_strcasecmp (element_name, "text:h") == 0) || > + (g_ascii_strcasecmp (element_name, "text:a") == 0) || > + (g_ascii_strcasecmp (element_name, "text:span") == 0) || > + (g_ascii_strcasecmp (element_name, "table:table-cell")) == > 0) { > + data->current = ODT_TAG_TYPE_WORD_TEXT; > } else { > data->current = -1; > } > @@ -461,23 +426,8 @@ xml_end_element_handler_content (GMarkup > { > ODTContentParseInfo *data = user_data; > > - switch (data->file_type) { > - case FILE_TYPE_ODT: > - if ((g_ascii_strcasecmp (element_name, "text:table-of-content") > == 0) || > - (g_ascii_strcasecmp (element_name, "text:table-index") == > 0) || > - (g_ascii_strcasecmp (element_name, > "text:illustration-index") == 0) || > - (g_ascii_strcasecmp (element_name, "text:section") == 0)) { > - data->styles_present = FALSE; > - } > - break; > - default: > - break; > - } > + data->current = -1; > > - if ((g_ascii_strcasecmp (element_name, "text:a") != 0) && > - (g_ascii_strcasecmp (element_name, "text:s") != 0)) { > - data->current = -1; > - } > } > > static void > > > > _______________________________________________ > tracker-list mailing list > tracker-list@gnome.org > http://mail.gnome.org/mailman/listinfo/tracker-list > -- Philip Van Hoof Software developer Codeminded BVBA - http://codeminded.be _______________________________________________ tracker-list mailing list tracker-list@gnome.org https://mail.gnome.org/mailman/listinfo/tracker-list