My first patch inadvertently set the wrong tag type - this patch has that corrected.
---- As per thread starting at http://mail.gnome.org/archives/tracker-list/2012-April/msg00012.html here is a proposed patch that simplifies (and improves) the indexing of oasis text files (.odt files). With this patch you get alot more of the content indexed on a typical file saved by Libreoffice, and so they are far more likely to show up in searches. Karl --- tracker-0.14.0.orig/src/tracker-extract/tracker-extract-oasis.c 2012-04-09 13:31:04.132949981 +0100 +++ tracker-0.14.0/src/tracker-extract/tracker-extract-oasis.c 2012-04-09 19:13:15.553943645 +0100 @@ -59,7 +59,6 @@ typedef struct { typedef struct { ODTTagType current; - gboolean styles_present; ODTFileType file_type; GString *content; gulong bytes_pending; @@ -128,7 +127,6 @@ extract_oasis_content (const gchar /* Create parse info */ info.current = ODT_TAG_TYPE_UNKNOWN; info.file_type = file_type; - info.styles_present = FALSE; info.content = g_string_new (""); info.bytes_pending = total_bytes; @@ -391,45 +389,12 @@ xml_start_element_handler_content (GMark switch (data->file_type) { case FILE_TYPE_ODT: - if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) || - (g_ascii_strcasecmp (element_name, "text:table-index") == 0) || - (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) || - (g_ascii_strcasecmp (element_name, "text:section") == 0)) { - data->styles_present = TRUE; - } else if (g_ascii_strcasecmp (element_name, "table:table-cell") == 0) { - data->current = ODT_TAG_TYPE_WORD_TEXT; - } else if (g_ascii_strcasecmp (element_name, "text:p") == 0) { - if (data->styles_present) { - data->current = ODT_TAG_TYPE_WORD_TEXT; - break; - } - - for (a = attribute_names, v = attribute_values; *a; ++a, ++v) { - if (g_ascii_strcasecmp (*a, "text:style-name") != 0) { - continue; - } - - if ((g_ascii_strcasecmp (*v, "title-article") == 0) || - (g_ascii_strcasecmp (*v, "para-padding") == 0) || - (g_ascii_strcasecmp (*v, "para-screen") == 0)) { - data->current = ODT_TAG_TYPE_WORD_TEXT; - } - } - } else if (g_ascii_strcasecmp (element_name, "text:h") == 0) { - for (a = attribute_names, v = attribute_values; *a; ++a, ++v) { - if (g_ascii_strcasecmp (*a, "text:style-name") != 0) { - continue; - } - - if (g_ascii_strncasecmp (*v, "Heading", 7) == 0) { - data->current = ODT_TAG_TYPE_WORD_TEXT; - } - } - } else if (g_ascii_strcasecmp (element_name, "text:span") == 0) { - data->current = ODT_TAG_TYPE_WORD_TEXT; - } else if ((g_ascii_strcasecmp (element_name, "text:a") == 0) || - (g_ascii_strcasecmp (element_name, "text:s") == 0)) { - data->current = ODT_TAG_TYPE_WORD_TEXT; + if ((g_ascii_strcasecmp (element_name, "text:p") == 0) || + (g_ascii_strcasecmp (element_name, "text:h") == 0) || + (g_ascii_strcasecmp (element_name, "text:a") == 0) || + (g_ascii_strcasecmp (element_name, "text:span") == 0) || + (g_ascii_strcasecmp (element_name, "table:table-cell")) == 0) { + data->current = ODT_TAG_TYPE_WORD_TEXT; } else { data->current = -1; } @@ -461,23 +426,8 @@ xml_end_element_handler_content (GMarkup { ODTContentParseInfo *data = user_data; - switch (data->file_type) { - case FILE_TYPE_ODT: - if ((g_ascii_strcasecmp (element_name, "text:table-of-content") == 0) || - (g_ascii_strcasecmp (element_name, "text:table-index") == 0) || - (g_ascii_strcasecmp (element_name, "text:illustration-index") == 0) || - (g_ascii_strcasecmp (element_name, "text:section") == 0)) { - data->styles_present = FALSE; - } - break; - default: - break; - } + data->current = -1; - if ((g_ascii_strcasecmp (element_name, "text:a") != 0) && - (g_ascii_strcasecmp (element_name, "text:s") != 0)) { - data->current = -1; - } } static void _______________________________________________ tracker-list mailing list tracker-list@gnome.org http://mail.gnome.org/mailman/listinfo/tracker-list