Awakening the dead...
On 6/29/07, jamie <[EMAIL PROTECTED]> wrote:
we only index the first 10,000 unique words in any one doc and/or only
the first 1mb of text
I believe the maximum text size limit can be adjusted in the config file
but the word limit is hardcoded (which needs to be changed to use a
config var). Note these settings greatly affect memory usage of trackerd
when indexing.
So, here's a patch to add support for the config key MaxWordsToIndex
which is stored in tracker->max_words_to_index and initialized by
default by the macro MAX_WORDS_TO_INDEX defined in tracker-utils.h.
This key will also be added to the default config written by trackerd.
I will look at adding support for these in the tracker-preferences UI in
the near future
So, as before, no tracker-preferences support from me...
As max_index_text_length and now also max_words_to_index is sanitized
to 0 if insane ;)
I hope it is OK to add yet another field to the Tracker structure.
Kind regards, Marcus
diff --git a/src/trackerd/tracker-parser.c b/src/trackerd/tracker-parser.c
index 802e824..56eb9c5 100644
--- a/src/trackerd/tracker-parser.c
+++ b/src/trackerd/tracker-parser.c
@@ -554,7 +554,7 @@ tracker_parse_text (GHashTable *word_table, const char *txt, int weight, gboolea
total_words++;
- if (total_words < 10000) {
+ if (total_words < tracker->max_words_to_index) {
count = GPOINTER_TO_INT (g_hash_table_lookup (word_table, index_word));
g_hash_table_insert (word_table, index_word, GINT_TO_POINTER (count + weight));
@@ -589,7 +589,7 @@ tracker_parse_text (GHashTable *word_table, const char *txt, int weight, gboolea
total_words++;
- if (total_words < 10000) {
+ if (total_words < tracker->max_words_to_index) {
count = GPOINTER_TO_INT (g_hash_table_lookup (word_table, word));
diff --git a/src/trackerd/tracker-utils.c b/src/trackerd/tracker-utils.c
index b5f56d5..37779a9 100644
--- a/src/trackerd/tracker-utils.c
+++ b/src/trackerd/tracker-utils.c
@@ -2303,6 +2303,8 @@ tracker_load_config_file ()
"[Performance]\n",
"# Maximum size of text in bytes to index from a file's text contents\n",
"MaxTextToIndex=1048576\n",
+ "# Maximum number of unique words to index from a file's text contents\n",
+ "MaxWordsToIndex=10000\n",
"# Specifies the no of entities to index before determining whether to perform index optimization\n",
"OptimizationSweepCount=10000\n",
"# Sets the maximum bucket count for the indexer\n",
@@ -2488,6 +2490,10 @@ tracker_load_config_file ()
}
+ if (g_key_file_has_key (key_file, "Performance", "MaxWordsToIndex", NULL)) {
+ tracker->max_words_to_index = g_key_file_get_integer (key_file, "Performance", "MaxWordsToIndex", NULL);
+ }
+
g_free (filename);
diff --git a/src/trackerd/tracker-utils.h b/src/trackerd/tracker-utils.h
index 83e4142..f20350e 100644
--- a/src/trackerd/tracker-utils.h
+++ b/src/trackerd/tracker-utils.h
@@ -50,7 +50,8 @@ extern char *tracker_actions[];
#define MAX_INDEX_TEXT_LENGTH 1048576
#define MAX_PROCESS_QUEUE_SIZE 100
#define MAX_EXTRACT_QUEUE_SIZE 500
-#define OPTIMIZATION_COUNT 10000
+#define OPTIMIZATION_COUNT 10000
+#define MAX_WORDS_TO_INDEX 10000
/* default indexer options */
#define MIN_INDEX_BUCKET_COUNT 65536 /* minimum bucket number of word index per division (total buckets = INDEXBNUM * INDEXDIV) */
@@ -215,6 +216,7 @@ typedef struct {
int battery_throttle;
gboolean use_extra_memory;
int initial_sleep;
+ int max_words_to_index;
/* indexing options */
int max_index_bucket_count;
diff --git a/src/trackerd/trackerd.c b/src/trackerd/trackerd.c
index e7232e4..3138ff0 100644
--- a/src/trackerd/trackerd.c
+++ b/src/trackerd/trackerd.c
@@ -1794,6 +1794,7 @@ set_defaults ()
tracker->max_process_queue_size = MAX_PROCESS_QUEUE_SIZE;
tracker->max_extract_queue_size = MAX_EXTRACT_QUEUE_SIZE;
tracker->optimization_count = OPTIMIZATION_COUNT;
+ tracker->max_words_to_index = MAX_WORDS_TO_INDEX;
tracker->max_index_bucket_count = MAX_INDEX_BUCKET_COUNT;
tracker->min_index_bucket_count = MIN_INDEX_BUCKET_COUNT;
@@ -1860,6 +1861,7 @@ sanity_check_option_values ()
if (tracker->max_index_text_length < 0) tracker->max_index_text_length = 0;
+ if (tracker->max_words_to_index < 0) tracker->max_words_to_index = 0;
if (tracker->optimization_count < 1000) tracker->optimization_count = 1000;
if (tracker->max_index_bucket_count < 1000) tracker->max_index_bucket_count= 1000;
if (tracker->min_index_bucket_count < 1000) tracker->min_index_bucket_count= 1000;
_______________________________________________
tracker-list mailing list
tracker-list@gnome.org
http://mail.gnome.org/mailman/listinfo/tracker-list