Hi I've just made updated versions of 3 patches against the latest release 2.3.21 in case they are useful to someone or might get considered for official inclusion. John dovecot-2.3.21-tika-http-auth.patch Allows specification of username and password in the fts_tika setting for basic auth against tika server. For example fts_tika = https://user:password@tika_server:443/tika dovecot-2.3.21-solr-max-size.patch This is a simplified version of my previous patch. Sets a size limit (configuration fts_max_size) on message bodies that are to be indexed. Message bodies for messages larger than fts_max_size are not sent to solr. Defaults to zero which means no limit. For example fts_max_size = 10M dovecot-2.3.21-solr-max-rows.patch When dovecot sends a search to solr it uses the rows parameter. For multiple mailbox search the value used is SOLR_MAX_MULTI_ROWS , hardcoded to 100000. For single mailbox search the value is uidnext. This patch introduces an upper limit for single mailbox search using the same value as SOLR_MAX_MULTI_ROWS, while leaving the existing functionality of sending the uidnext value if it is smaller. This is just to place a more reasonable upper bound since uidnext can get much larger.
--- dovecot-2.3.21/src/plugins/fts/fts-parser-tika.c 2023-09-14 15:17:47.000000000 +0200 +++ dovecot-2.3.21-new/src/plugins/fts/fts-parser-tika.c 2023-12-09 11:07:46.436259394 +0100 @@ -57,7 +57,7 @@ tuser = p_new(user->pool, struct fts_parser_tika_user, 1); MODULE_CONTEXT_SET(user, fts_parser_tika_user_module, tuser); - if (http_url_parse(url, NULL, 0, user->pool, + if (http_url_parse(url, NULL, HTTP_URL_ALLOW_USERINFO_PART, user->pool, &tuser->http_url, &error) < 0) { i_error("fts_tika: Failed to parse HTTP url %s: %s", url, error); return -1; @@ -159,6 +159,11 @@ http_url->host.name, t_strconcat(http_url->path, http_url->enc_query, NULL), fts_tika_parser_response, parser); + if (http_url->user != NULL) { + http_client_request_set_auth_simple( + http_req, http_url->user, http_url->password); + } + http_client_request_set_port(http_req, http_url->port); http_client_request_set_ssl(http_req, http_url->have_ssl); if (parser_context->content_type != NULL)
--- dovecot-2.3.21/src/plugins/fts/fts-build-mail.c 2023-09-14 15:17:47.000000000 +0200 +++ dovecot-2.3.21-new/src/plugins/fts/fts-build-mail.c 2023-12-09 11:04:02.205207091 +0100 @@ -17,6 +17,7 @@ #include "fts-filter.h" #include "fts-api-private.h" #include "fts-build-mail.h" +#include "settings-parser.h" /* there are other characters as well, but this doesn't have to be exact */ #define IS_WORD_WHITESPACE(c) \ @@ -573,6 +574,18 @@ bool binary_body; const char *error; int ret; + uoff_t msg_size; + uoff_t fts_max_size = 0; + const char * fts_max_size_setting; + bool oversized_msg; + + fts_max_size_setting = mail_user_plugin_getenv(update_ctx->backend->ns->user, "fts_max_size"); + if (fts_max_size_setting != NULL) { + if (settings_get_size(fts_max_size_setting, &fts_max_size, &error) < 0) { + i_error("%s",error); + fts_max_size = 0; + } + } *may_need_retry_r = FALSE; if (mail_get_stream_because(mail, NULL, NULL, "fts indexing", &input) < 0) { @@ -583,6 +596,14 @@ return -1; } + oversized_msg = FALSE; + i_stream_get_size(input,TRUE,&msg_size); + if (fts_max_size > 0 && msg_size > fts_max_size) { + i_info("Skipping message body indexing because size %"PRIuUOFF_T" exceeds setting fts_max_size %s",msg_size,fts_max_size_setting); + oversized_msg = TRUE; + } + + i_zero(&ctx); ctx.update_ctx = update_ctx; ctx.mail = mail; @@ -640,7 +661,7 @@ message_decoder_set_return_binary(decoder, TRUE); body_part = TRUE; } else { - if (skip_body) + if (skip_body||oversized_msg) continue; } @@ -675,7 +696,7 @@ else (void)fts_parser_deinit(&ctx.body_parser, NULL); } - if (ret == 0 && body_part && !skip_body && !body_added) { + if (ret == 0 && body_part && !skip_body && !oversized_msg && !body_added) { /* make sure body is added even when it doesn't exist */ block.data = NULL; block.size = 0; ret = fts_build_body_block(&ctx, &block, TRUE);
--- dovecot-2.3.21/src/plugins/fts-solr/fts-backend-solr.c 2023-09-14 15:17:47.000000000 +0200 +++ dovecot-2.3.21-new/src/plugins/fts-solr/fts-backend-solr.c 2023-12-09 10:46:52.976808250 +0100 @@ -837,7 +837,7 @@ str = t_str_new(256); str_printfa(str, "wt=xml&fl=uid,score&rows=%u&sort=uid+asc&q=%%7b!lucene+q.op%%3dAND%%7d", - status.uidnext); + I_MIN(status.uidnext,SOLR_MAX_MULTI_ROWS)); prefix_len = str_len(str); if (solr_add_definite_query_args(str, args, and_args)) {
_______________________________________________ dovecot mailing list -- dovecot@dovecot.org To unsubscribe send an email to dovecot-le...@dovecot.org