Package: release.debian.org Severity: normal User: release.debian....@packages.debian.org Usertags: unblock
Please unblock package dovecot-fts-xapian This version (1.4.9a) fixes a number of important bugs in the indexer including: + fix indexing of attachments, closes: #985654 + fix indexing of accented characters + fix memory errors / segfaults when indexing large mailboxes Source debdiff from 1.4.7-1 (currently in testing) to 1.4.9a-1 is attached here. Please let me know when approved so I can upload to unstable. unblock dovecot-fts-xapian/1.4.9a-1 Thanks, --Joe
diffstat for dovecot-fts-xapian-1.4.7 dovecot-fts-xapian-1.4.9a .gitignore | 65 ++++++++++ Makefile.am | 4 PACKAGES/RPM/README.md | 20 +++ PACKAGES/RPM/fts-xapian.spec | 41 ++++++ README.md | 46 +++++-- configure.ac | 2 debian/changelog | 11 + debian/watch | 4 fts-xapian-config.h.in | 2 src/fts-backend-xapian-functions.cpp | 175 +++++++++++++++++++++-------- src/fts-backend-xapian.cpp | 211 ++++++++++++++++++----------------- src/fts-xapian-plugin.c | 2 src/fts-xapian-plugin.h | 9 - 13 files changed, 425 insertions(+), 167 deletions(-) diff -Nru -w dovecot-fts-xapian-1.4.7/.gitignore dovecot-fts-xapian-1.4.9a/.gitignore --- dovecot-fts-xapian-1.4.7/.gitignore 1969-12-31 19:00:00.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/.gitignore 2021-04-24 16:27:55.000000000 -0400 @@ -0,0 +1,65 @@ +# http://www.gnu.org/software/automake + +Makefile.in +/ar-lib +/mdate-sh +/py-compile +/test-driver +/ylwrap +.deps/ +.dirstamp + +# http://www.gnu.org/software/autoconf + +autom4te.cache +/autoscan.log +/autoscan-*.log +/aclocal.m4 +/compile +/config.guess +/config.h.in +/config.log +/config.status +/config.sub +/configure +/configure.scan +/depcomp +/install-sh +/missing +/stamp-h1 +/stamp-h2 +/stamp.h + +# https://www.gnu.org/software/libtool/ + +/ltmain.sh +/libtool + +# http://www.gnu.org/software/texinfo + +/texinfo.tex + +# http://www.gnu.org/software/m4/ + +m4/libtool.m4 +m4/ltoptions.m4 +m4/ltsugar.m4 +m4/ltversion.m4 +m4/lt~obsolete.m4 + +# Generated Makefile +# (meta build system like autotools, +# can automatically generate from config.status script +# (which is called by configure script)) +Makefile + +/dummy-config.h +/dummy-config.h.in +/fts-xapian-config.h +/run-test.sh + +src/*.o +src/*.lo +src/*.la + +src/.libs/** diff -Nru -w dovecot-fts-xapian-1.4.7/Makefile.am dovecot-fts-xapian-1.4.9a/Makefile.am --- dovecot-fts-xapian-1.4.7/Makefile.am 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/Makefile.am 2021-04-24 16:27:55.000000000 -0400 @@ -2,5 +2,5 @@ ACLOCAL_AMFLAGS = -I m4 -PACKAGE_VERSION = "1.4.7" -VERSION = "1.4.7" +PACKAGE_VERSION = "1.4.9a" +VERSION = "1.4.9a" diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md --- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/README.md 1969-12-31 19:00:00.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/README.md 2021-04-24 16:27:55.000000000 -0400 @@ -0,0 +1,20 @@ +As root: + + Install the development environment and required devel packages: + -- dnf groupinstall "Development Tools" + -- dnf install rpm-build rpm-devel rpmlint make coreutils diffutils patch rpmdevtools + -- dnf install dovecot-devel dovecot libicu-devel icu xapian-core xapian-core-devel + +As a normal user: + + Create the ~/rpmbuild tree as a normal user (never build rpms as root): + -- rpmdev-setuptree + Place the spec file under: + ~/rpmbuild/SPECS/fts-xapian.spec + Place the tar.gz sources under: + ~/rpmbuild/SOURCES/fts-xapian-1.4.9a.tar.gz + Generate the binary rpm with: + -- QA_RPATHS=$(( 0x0001|0x0010 )) rpmbuild -bb ~/rpmbuild/SPECS/fts-xapian.spec + +Your RPM packages will be under ~/rpmbuild/RPMS/x86_64/ + diff -Nru -w dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec --- dovecot-fts-xapian-1.4.7/PACKAGES/RPM/fts-xapian.spec 1969-12-31 19:00:00.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/PACKAGES/RPM/fts-xapian.spec 2021-04-24 16:27:55.000000000 -0400 @@ -0,0 +1,41 @@ +Name: fts-xapian +Version: 1.4.9a +Release: 1%{?dist} +Summary: Dovecot FTS plugin based on Xapian + +License: LGPL-2.1 +URL: https://github.com/grosjo/fts-xapian +Source0: fts-xapian-1.4.9a.tar.gz + +BuildRequires: xapian-core-devel, libicu-devel, dovecot-devel +Requires: xapian-core, xapian-core-libs, dovecot + +%description +This project intends to provide a straightforward, simple and maintenance free, way to configure FTS plugin for Dovecot, leveraging the efforts by the Xapian.org team. + +This effort came after Dovecot team decided to deprecate "fts_squat" included in the dovecot core, and due to the complexity of the Solr plugin capabilitles, un-needed for most users. + + +%prep +%autosetup +autoreconf -vi +./configure --with-dovecot=/usr/lib64/dovecot + + +%build +make %{?_smp_mflags} + + +%install +%make_install + + +%files +/usr/lib64/dovecot/lib21_fts_xapian_plugin.la +/usr/lib64/dovecot/lib21_fts_xapian_plugin.so +/usr/lib64/dovecot/lib21_fts_xapian_plugin.a + + +%changelog +* Tue Apr 6 2021 xapian +- diff -Nru -w dovecot-fts-xapian-1.4.7/README.md dovecot-fts-xapian-1.4.9a/README.md --- dovecot-fts-xapian-1.4.7/README.md 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/README.md 2021-04-24 16:27:55.000000000 -0400 @@ -79,12 +79,14 @@ plugin = fts fts_xapian (...) fts = xapian - fts_xapian = partial=3 full=20 attachments=0 verbose=0 + fts_xapian = partial=3 full=20 verbose=0 fts_autoindex = yes fts_enforced = yes fts_autoindex_exclude = \Trash + + fts_decoder = decode2text // To index attachements (...) } @@ -92,27 +94,48 @@ service indexer-worker { vsz_limit = 2G // or above (or 0 if you have rather large memory usable on your server, which is preferred for performance) } + +service decode2text { + executable = script /usr/libexec/dovecot/decode2text.sh + user = dovecot + unix_listener decode2text { + mode = 0666 + } +} (...) ``` -Partial & full parameters : 3 and 20 are the NGram values for header fields, which means the keywords created for fields (To, Cc, ...) are between 3 and 20 chars long. -Full words are also added by default (if not longer than 245 chars, which is the limit of Xapian capability). + +Indexing options +---------------- + +| Option | Description | Possible values | Default value | +|----------------|--------------------------------|--------------------------------------|---------------| +| partial & full | NGram values for header fields | between 3 and 20 characters | 3 & 20 | +| verbose | Logs verbosity | 0 (silent), 1 (verbose) or 2 (debug) | 0 | + +NGrams details +-------------- + +The partial & full parameters are the NGram values for header fields, which means the keywords created for fields (To, +Cc, ...) are between 3 and 20 chars long. Full words are also added by default (if not longer than 245 chars, which is +the limit of Xapian capability). Example: "<john@doe>" will create joh, ohn, hn@, ..., john@d, ohn@do, ..., and finally john@doe as searchable keywords. -Set "verbose=1" to see verbose messages in the log, "verbose=2" for debug -Set "attachments=1" if you want to index attachments (this works only for text attachments) +Index updating +-------------- -Restart Dovecot: +Just restart Dovecot: -``` +```sh sudo servicectl restart dovecot ``` -If this is not a fresh install of dovecot, you need to re-index your mailboxes +If this is not a fresh install of dovecot, you need to re-index your mailboxes: -``` +```sh doveadm index -A -q \* ``` @@ -121,7 +144,8 @@ You shall put in a cron the following command (for daily run for instance) : -``` + +```sh doveadm fts optimize -A ``` @@ -131,4 +155,6 @@ Please submit requests/bugs via the [GitHub issue tracker](https://github.com/grosjo/fts-xapian/issues). +A Matrix Room exists also at : #xapian-dovecot:grosjo.net + Thanks to Aki Tuomi <aki.tu...@open-xchange.com>, Stephan Bosch <step...@rename-it.nl>, Paul Hecker <p...@iwascoding.com> diff -Nru -w dovecot-fts-xapian-1.4.7/configure.ac dovecot-fts-xapian-1.4.9a/configure.ac --- dovecot-fts-xapian-1.4.7/configure.ac 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/configure.ac 2021-04-24 16:27:55.000000000 -0400 @@ -1,4 +1,4 @@ -AC_INIT([Dovecot FTS Xapian], [1.4.7], [j...@grosjo.net], [dovecot-fts-xapian]) +AC_INIT([Dovecot FTS Xapian], [1.4.9a], [j...@grosjo.net], [dovecot-fts-xapian]) AC_CONFIG_AUX_DIR([.]) AC_CONFIG_SRCDIR([src]) AC_CONFIG_MACRO_DIR([m4]) diff -Nru -w dovecot-fts-xapian-1.4.7/debian/changelog dovecot-fts-xapian-1.4.9a/debian/changelog --- dovecot-fts-xapian-1.4.7/debian/changelog 2021-01-31 21:35:02.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/debian/changelog 2021-06-22 22:40:48.000000000 -0400 @@ -1,3 +1,14 @@ +dovecot-fts-xapian (1.4.9a-1) unstable; urgency=medium + + * [2da6c89] d/watch: allow non-numbers in version + * [18c496d] New upstream version 1.4.9a + + fix indexing of attachments, closes: #985654 + + fix indexing of accented characters + + fix memory errors / segfaults when indexing large mailboxes + + handle indexing of virtual folders + + -- Joseph Nahmias <je...@debian.org> Tue, 22 Jun 2021 22:40:48 -0400 + dovecot-fts-xapian (1.4.7-1) unstable; urgency=medium * [e3bdb1b] fix VCS paths diff -Nru -w dovecot-fts-xapian-1.4.7/debian/watch dovecot-fts-xapian-1.4.9a/debian/watch --- dovecot-fts-xapian-1.4.7/debian/watch 2020-10-18 12:23:30.000000000 -0400 +++ dovecot-fts-xapian-1.4.9a/debian/watch 2021-06-22 22:10:41.000000000 -0400 @@ -3,6 +3,6 @@ version=4 -opts="filenamemangle=s%(?:.*?)?v?(\d[\d.]*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" \ +opts="filenamemangle=s%(?:.*?)?v?(\d.*)\.tar\.gz%dovecot-fts-xapian-$1.tar.gz%" \ https://github.com/grosjo/fts-xapian/tags \ - (?:.*?/)?v?(\d[\d.]*)\.tar\.gz + (?:.*?/)?v?(\d.*)\.tar\.gz diff -Nru -w dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in --- dovecot-fts-xapian-1.4.7/fts-xapian-config.h.in 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/fts-xapian-config.h.in 2021-04-24 16:27:55.000000000 -0400 @@ -1,2 +1,2 @@ #define FTS_XAPIAN_NAME "Dovecot FTS Xapian" -#define FTS_XAPIAN_VERSION "1.4.7" +#define FTS_XAPIAN_VERSION "1.4.9a" diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp --- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian-functions.cpp 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian-functions.cpp 2021-04-24 16:27:55.000000000 -0400 @@ -26,6 +26,8 @@ class XQuerySet { + private: + icu::Transliterator *accentsConverter; public: char * header; char * text; @@ -44,6 +46,7 @@ header=NULL; text=NULL; global_neg=false; + accentsConverter=NULL; } XQuerySet(bool is_and, bool is_neg, long l) @@ -55,6 +58,7 @@ text=NULL; global_and=is_and; global_neg=is_neg; + accentsConverter=NULL; } ~XQuerySet() @@ -68,6 +72,7 @@ } if(qsize>0) i_free(qs); qsize=0; qs=NULL; + if(accentsConverter != NULL) delete(accentsConverter); } void add(const char * h,const char * t) @@ -80,11 +85,8 @@ if(h==NULL) return; if(t==NULL) return; - icu::StringPiece sp_h(h); - icu::UnicodeString h2 = icu::UnicodeString::fromUTF8(sp_h); - - icu::StringPiece sp_t(t); - icu::UnicodeString t2 = icu::UnicodeString::fromUTF8(sp_t); + icu::UnicodeString h2 = icu::UnicodeString::fromUTF8(icu::StringPiece(h)); + icu::UnicodeString t2 = icu::UnicodeString::fromUTF8(icu::StringPiece(t)); add(&h2,&t2,is_neg); } @@ -154,6 +156,19 @@ std::string tmp1; h->toUTF8String(tmp1); char * h2 = i_strdup(tmp1.c_str()); + + if(accentsConverter == NULL) + { + UErrorCode status = U_ZERO_ERROR; + accentsConverter = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status); + if(U_FAILURE(status)) + { + i_error("FTS Xapian: Can not allocate ICU translator (2)"); + accentsConverter = NULL; + } + } + if(accentsConverter != NULL) accentsConverter->transliterate(*t); + std::string tmp2; t->toUTF8String(tmp2); char * t2 = i_strdup(tmp2.c_str()); @@ -178,7 +193,7 @@ } if(i>=HDRS_NB) { - i_error("FTS Xapian: Unknown header (lookup) '%s'",h2); + if(verbose>1) i_error("FTS Xapian: Unknown header (lookup) '%s'",h2); i_free(h2); i_free(t2); return; } @@ -299,8 +314,6 @@ char *s = i_strdup(get_string().c_str()); - if(verbose>0) { i_info("FTS Xapian: Query= %s",s); } - qp->set_database(*db); Xapian::Query * q = new Xapian::Query(qp->parse_query(s,Xapian::QueryParser::FLAG_DEFAULT));// | Xapian::QueryParser::FLAG_PARTIAL)); @@ -317,6 +330,7 @@ long partial,full,hardlimit; const char * prefix; bool onlyone; + icu::Transliterator *accentsConverter; public: char ** data; @@ -334,6 +348,7 @@ hardlimit=XAPIAN_TERM_SIZELIMIT-strlen(prefix); onlyone=false; if(strcmp(prefix,"XMID")==0) onlyone=true; + accentsConverter = NULL; } ~XNGram() @@ -348,14 +363,14 @@ i_free(data); } data=NULL; + if(accentsConverter != NULL) delete(accentsConverter); } void add(const char * s) { if(s==NULL) return; - icu::StringPiece sp(s); - icu::UnicodeString d = icu::UnicodeString::fromUTF8(sp); + icu::UnicodeString d = icu::UnicodeString::fromUTF8(icu::StringPiece(s)); add(&d); } @@ -401,6 +416,18 @@ long l = d->length(); if(l<partial) return; + if(accentsConverter == NULL) + { + UErrorCode status = U_ZERO_ERROR; + accentsConverter = icu::Transliterator::createInstance("NFD; [:M:] Remove; NFC", UTRANS_FORWARD, status); + if(U_FAILURE(status)) + { + i_error("FTS Xapian: Can not allocate ICU translator (1)"); + accentsConverter = NULL; + } + } + if(accentsConverter != NULL) accentsConverter->transliterate(*d); + if(onlyone) { add_stem(d); @@ -470,6 +497,13 @@ } }; +static long fts_backend_xapian_current_time() +{ + struct timeval tp; + gettimeofday(&tp, NULL); + return tp.tv_sec * 1000 + tp.tv_usec / 1000; +} + static long fts_backend_xapian_memory_used() // KB { FILE* file = fopen("/proc/self/status", "r"); @@ -522,7 +556,7 @@ return 0; } -static bool fts_backend_xapian_test_memory() +static bool fts_backend_xapian_test_memory(struct xapian_fts_backend *backend, long add) { rlim_t limit; @@ -531,15 +565,23 @@ long used = fts_backend_xapian_memory_used(); long fri = fts_backend_xapian_memory_free(); // Free RAM + backend->nb_pushes++; + long m2 = 2*used/backend->nb_pushes; + if(backend->max_push < m2) backend->max_push=m2; + m2=backend->max_push; + + add = long(add/1024.0); + if(m<1) { - if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB, Free = %ld MB",long(used/1024),long(fri/1024)); - return (fri>used/2); + if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld pushes), Free = %ld MB, Additional data %ld KB, Estimated required = %ld MB",long(used/1024), backend->nb_pushes, long(fri/1024), add, long(m2/1024)); + return ((fri>XAPIAN_MIN_RAM*1024)&&(fri>m2)); + } + else + { + if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld%%) (%ld pushes), Limit = %ld MB, Free = %ld MB, Additional data %ld KB, Estimated required = %ld MB",long(used/1024),long(used*100.0/m),backend->nb_pushes,long(m/1024),long(fri/1024), add, long(m2/1024)); + return ((fri>XAPIAN_MIN_RAM*1024)&&(m>(used+m2))&&(fri>m2)); } - - if(verbose>0) i_info("FTS Xapian: Memory stats : Used = %ld MB (%ld%%), Limit = %ld MB, Free = %ld MB",long(used/1024),long(used*100.0/m),long(m/1024),long(fri/1024)); - - return ((m>used*3.0/2)&&(fri>used/2)); } static bool fts_backend_xapian_open_readonly(struct xapian_fts_backend *backend, Xapian::Database ** dbr) @@ -598,9 +640,7 @@ if(backend->old_guid != NULL) { /* Performance calculator*/ - struct timeval tp; - gettimeofday(&tp, NULL); - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - backend->perf_dt; + long dt = fts_backend_xapian_current_time() - backend->perf_dt; double r=0; if(dt>0) { @@ -618,8 +658,12 @@ static void fts_backend_xapian_release(struct xapian_fts_backend *backend, const char * reason, long commit_time) { + bool err=false; + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_release (%s)",reason); + if(commit_time<1) commit_time = fts_backend_xapian_current_time(); + if(backend->dbw !=NULL) { try @@ -630,6 +674,7 @@ catch(Xapian::Error e) { i_error("FTS Xapian: %s : %s - %s",reason,e.get_type(),e.get_error_string()); + err=true; } delete(backend->dbw); backend->dbw = NULL; @@ -637,12 +682,27 @@ backend->commit_time = commit_time; } + if(err) + { + if(verbose>0) i_info("FTS Xapian: Re-creating index database due to error"); + try + { + Xapian::WritableDatabase * db = new Xapian::WritableDatabase(backend->db,Xapian::DB_CREATE_OR_OVERWRITE | Xapian::DB_RETRY_LOCK | Xapian::DB_BACKEND_GLASS); + db->close(); + delete(db); + } + catch(Xapian::Error e) + { + i_error("FTS Xapian: Can't re-create Xapian DB (%s) %s : %s - %s",backend->boxname,backend->db,e.get_type(),e.get_error_string()); + } + } + + backend->nb_pushes=0; + backend->max_push=0; + if(verbose>0) { - struct timeval tp; - gettimeofday(&tp, NULL); - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; - i_info("FTS Xapian: Committed '%s' in %ld ms",reason,current_time - commit_time); + i_info("FTS Xapian: Committed '%s' in %ld ms",reason,fts_backend_xapian_current_time() - commit_time); } } @@ -689,9 +749,7 @@ { Xapian::WritableDatabase * dbw; - struct timeval tp; - gettimeofday(&tp, NULL); - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000; + long dt = fts_backend_xapian_current_time(); try { @@ -747,21 +805,18 @@ dbw->commit(); dbw->close(); delete(dbw); - gettimeofday(&tp, NULL); - dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - dt; + + dt = fts_backend_xapian_current_time() - dt; i_info("FTS Xapian: Expunging '%s' done in %.2f secs",fpath,dt/1000.0); } static int fts_backend_xapian_unset_box(struct xapian_fts_backend *backend) { - if(verbose>1) i_info("FTS Xapian: Unset box '%s' (%s)",backend->boxname,backend->guid); + if(verbose>0) i_info("FTS Xapian: Unset box '%s' (%s)",backend->boxname,backend->guid); - struct timeval tp; - gettimeofday(&tp, NULL); - long commit_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; + long commit_time = fts_backend_xapian_current_time(); fts_backend_xapian_oldbox(backend); - fts_backend_xapian_release(backend,"unset_box",commit_time); if(backend->db != NULL) @@ -779,19 +834,47 @@ return 0; } +static int fts_backend_xapian_set_path(struct xapian_fts_backend *backend) +{ + struct mail_namespace * ns = backend->backend.ns; + if(ns->alias_for != NULL) + { + if(verbose>0) i_info("FTS Xapian: Switching namespace"); + ns = ns->alias_for; + } + + const char * path = mailbox_list_get_root_forced(ns->list, MAILBOX_LIST_PATH_TYPE_INDEX); + + if(backend->path != NULL) i_free(backend->path); + backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL); + + if(verbose>0) i_info("FTS Xapian: Index path = %s",backend->path); + + struct stat sb; + if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) + { + if (mailbox_list_mkdir_root(backend->backend.ns->list, backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0) + { + i_error("FTS Xapian: can not create '%s'",backend->path); + return -1; + } + } + return 0; +} + static int fts_backend_xapian_set_box(struct xapian_fts_backend *backend, struct mailbox *box) { if (box == NULL) { if(backend->guid != NULL) fts_backend_xapian_unset_box(backend); - if(verbose>0) i_info("FTS Xapian: Box is empty"); + if(verbose>1) i_info("FTS Xapian: Box is empty"); return 0; } const char * mb; fts_mailbox_get_guid(box, &mb ); - if(verbose>1) i_info("FTX Xapian: Set box '%s' (%s)",box->name,mb); + if(verbose>0) i_info("FTS Xapian: Set box '%s' (%s)",box->name,mb); if((mb == NULL) || (strlen(mb)<3)) { @@ -807,11 +890,12 @@ if(backend->guid != NULL) fts_backend_xapian_unset_box(backend); + if(fts_backend_xapian_set_path(backend)<0) return -1; + struct timeval tp; long current_time; - gettimeofday(&tp, NULL); - current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; + current_time = fts_backend_xapian_current_time(); backend->commit_updates = 0; backend->commit_time = current_time; @@ -837,7 +921,6 @@ } i_free(t); - /* Performance calculator*/ backend->perf_dt = current_time; backend->perf_uid=0; @@ -917,15 +1000,15 @@ { bool ok=true; - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_hdr"); + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_hdr"); Xapian::WritableDatabase * dbx = backend->dbw; long p = backend->partial; long f = backend->full; - if(data->length()<p) { return true; } + if(data->length()<p) return true; - if(strlen(field)<1) { return true; } + if(strlen(field)<1) return true; long i=0; while((i<HDRS_NB) && (strcmp(field,hdrs_emails[i])!=0)) @@ -1006,7 +1089,7 @@ } catch (std::bad_alloc& ba) { - i_error("FTS Xapian: Memory error '%s'",ba.what()); + i_info("FTS Xapian: Memory too low (hdr) '%s'",ba.what()); ok = false; } } @@ -1020,13 +1103,13 @@ { bool ok = true; - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_index_text"); + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_index_text"); Xapian::WritableDatabase * dbx = backend->dbw; long p = backend->partial; long f = backend->full; - if(data->length()<p) { return true; } + if(data->length()<p) return true; XQuerySet * xq = new XQuerySet(); @@ -1134,7 +1217,7 @@ } catch (std::bad_alloc& ba) { - i_error("FTS Xapian: Memory error '%s'",ba.what()); + i_info("FTS Xapian: Memory too low (text) '%s'",ba.what()); ok = false; } } diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp --- dovecot-fts-xapian-1.4.7/src/fts-backend-xapian.cpp 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/src/fts-backend-xapian.cpp 2021-04-24 16:27:55.000000000 -0400 @@ -6,7 +6,9 @@ #include "fts-xapian-plugin.h" } #include <dirent.h> +#include <unicode/utypes.h> #include <unicode/unistr.h> +#include <unicode/translit.h> #include <sys/time.h> #define XAPIAN_FILE_PREFIX "xapian-indexes" @@ -15,6 +17,7 @@ #define XAPIAN_COMMIT_TIMEOUT 300L #define XAPIAN_WILDCARD "wldcrd" #define XAPIAN_EXPUNGE_HEADER 9 +#define XAPIAN_MIN_RAM 200L #define HDRS_NB 11 static const char * hdrs_emails[HDRS_NB] = { "uid", "subject", "from", "to", "cc", "bcc", "messageid", "listid", "body", "expungeheader", "" }; @@ -25,9 +28,8 @@ struct xapian_fts_backend { struct fts_backend backend; - char * path; + char * path = NULL; long partial,full; - bool attachments; char * guid; char * boxname; @@ -45,12 +47,16 @@ long perf_nb; long perf_uid; long perf_dt; + + long nb_pushes; + long max_push; }; struct xapian_fts_backend_update_context { struct fts_backend_update_context ctx; char * tbi_field=NULL; + bool isattachment=false; bool tbi_isfield; uint32_t tbi_uid=0; }; @@ -69,10 +75,10 @@ static int fts_backend_xapian_init(struct fts_backend *_backend, const char **error_r) { - if(verbose>0) i_info("fts_backend_xapian_init"); + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend; + + if(verbose>0) i_info("fts_backend_xapian_init : %s",_backend->name); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)_backend; const char *const *tmp, *env; long len; @@ -83,11 +89,13 @@ backend->path = NULL; backend->old_guid = NULL; backend->old_boxname = NULL; - backend->attachments = false; verbose = 0; backend->partial = 0; backend->full = 0; + backend->nb_pushes=0; + backend->max_push=0; + env = mail_user_plugin_getenv(_backend->ns->user, "fts_xapian"); if (env == NULL) { @@ -114,7 +122,7 @@ } else if (strncmp(*tmp,"attachments=",12)==0) { - if(atol(*tmp + 12)>0) backend->attachments=true; + // Legacy } else { @@ -144,28 +152,16 @@ return -1; } - const char * path = mailbox_list_get_root_forced(_backend->ns->list, MAILBOX_LIST_PATH_TYPE_INDEX); - backend->path = i_strconcat(path, "/" XAPIAN_FILE_PREFIX, NULL); - - struct stat sb; - if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) - { - if (mailbox_list_mkdir_root(backend->backend.ns->list, backend->path, MAILBOX_LIST_PATH_TYPE_INDEX) < 0) - { - i_error("FTS Xapian: can not create '%s'",backend->path); - return -1; - } - } + if(fts_backend_xapian_set_path(backend)<0) return -1; - if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld attachments=%d verbose=%d",backend->partial,backend->full,backend->attachments,verbose); + if(verbose>0) i_info("FTS Xapian: Starting with partial=%ld full=%ld verbose=%d",backend->partial,backend->full,verbose); return 0; } static void fts_backend_xapian_deinit(struct fts_backend *_backend) { - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)_backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend; if(verbose>0) i_info("FTS Xapian: Deinit %s)",backend->path); @@ -188,8 +184,7 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_get_last_uid"); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)_backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)_backend; *last_uid_r = 0; @@ -242,18 +237,12 @@ static int fts_backend_xapian_update_deinit(struct fts_backend_update_context *_ctx) { - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)ctx->ctx.backend; + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend; if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_deinit (%s)",backend->path); - struct timeval tp; - gettimeofday(&tp, NULL); - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; - - fts_backend_xapian_release(backend,"update_deinit", current_time); + fts_backend_xapian_release(backend,"update_deinit",0); i_free(ctx); @@ -264,10 +253,8 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_set_mailbox"); - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)ctx->ctx.backend; + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend; fts_backend_xapian_set_box(backend, box); } @@ -276,10 +263,8 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_expunge"); - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)ctx->ctx.backend; + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend; if(!fts_backend_xapian_check_access(backend)) { @@ -332,11 +317,9 @@ { if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_set_build_key"); - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *)ctx->ctx.backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *)ctx->ctx.backend; ctx->tbi_isfield=false; ctx->tbi_uid=0; @@ -364,9 +347,7 @@ if((backend->perf_nb - backend->perf_pt)>=200) { backend->perf_pt = backend->perf_nb; - struct timeval tp; - gettimeofday(&tp, NULL); - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - backend->perf_dt; + long dt = fts_backend_xapian_current_time() - backend->perf_dt; double r=0; if(dt>0) { @@ -384,19 +365,27 @@ if(verbose>1) i_info("FTS Xapian: New part (Header=%s,Type=%s,Disposition=%s)",field,type,disposition); // Verify content-type - if((type != NULL) && (strncmp(type,"text",4)!=0)) + + if(key->type == FTS_BACKEND_BUILD_KEY_BODY_PART_BINARY) { - if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s'",type); + if(verbose>0) i_info("FTS Xapian: Skipping binary part of type '%s'",type); return FALSE; } - // Verify content-disposition - if((disposition != NULL) && (!backend->attachments) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL))) + if((type != NULL) && (strncmp(type,"text",4)!=0) && ((disposition==NULL) || ((strstr(disposition,"filename=")==NULL) && (strstr(disposition,"attachment")==NULL)))) { - if(verbose>0) i_info("FTS Xapian: Skipping part of type '%s' and disposition '%s'",type,disposition); + if(verbose>0) i_info("FTS Xapian: Non-binary & non-text part of type '%s'",type); return FALSE; } + // Verify content-disposition + ctx->isattachment=false; + if((disposition != NULL) && ((strstr(disposition,"filename=")!=NULL) || (strstr(disposition,"attachment")!=NULL))) + { + if(verbose>0) i_info("FTS Xapian: Found part as attachment of type '%s' and disposition '%s'",type,disposition); + ctx->isattachment=true; + } + // Fill-in field if(field==NULL) { @@ -422,7 +411,7 @@ } if(i>=HDRS_NB) { - if(verbose>1) i_info("FTS Xapian: Unknown header (indexing) '%s'",ctx->tbi_field); + if(verbose>1) i_info("FTS Xapian: Unknown header '%s' of part",ctx->tbi_field); i_free(ctx->tbi_field); ctx->tbi_field=NULL; return FALSE; @@ -447,10 +436,9 @@ static void fts_backend_xapian_update_unset_build_key(struct fts_backend_update_context *_ctx) { - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_unset_build_key"); + if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_update_unset_build_key"); - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; if(ctx->tbi_field!=NULL) { @@ -464,26 +452,32 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_refresh"); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) _backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend; - struct timeval tp; - gettimeofday(&tp, NULL); - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; - - fts_backend_xapian_release(backend,"refresh", current_time); + fts_backend_xapian_release(backend,"refresh", 0); return 0; } static int fts_backend_xapian_update_build_more(struct fts_backend_update_context *_ctx, const unsigned char *data, size_t size) { - if(verbose>1) i_info("FTS Xapian: fts_backend_xapian_update_build_more"); + struct xapian_fts_backend_update_context *ctx = (struct xapian_fts_backend_update_context *)_ctx; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) ctx->ctx.backend; - struct xapian_fts_backend_update_context *ctx = - (struct xapian_fts_backend_update_context *)_ctx; - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) ctx->ctx.backend; + if(verbose>1) + { + if(ctx->isattachment) + { + char * t = i_strdup("NODATA"); + if(data != NULL) { i_free(t); t = i_strndup(data,40); } + i_info("FTS Xapian: Indexing part as attachment (data like '%s')",t); + i_free(t); + } + else + { + i_info("FTS Xapian: Indexing part as text"); + } + } if(ctx->tbi_uid<1) return 0; @@ -499,13 +493,10 @@ return -1; } - struct timeval tp; - gettimeofday(&tp, NULL); - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; - - if(!fts_backend_xapian_test_memory()) + if(!fts_backend_xapian_test_memory(backend,d2.length())) { - fts_backend_xapian_release(backend,"Low memory indexing", current_time); + if(verbose>0) i_info("FTS Xapian: Warning Low memory"); + fts_backend_xapian_release(backend,"Low memory indexing", 0); if(!fts_backend_xapian_check_access(backend)) { i_error("FTS Xapian: Buildmore: Can not open db (2)"); @@ -518,16 +509,41 @@ if(ctx->tbi_isfield) { ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2); + if(!ok) + { + if(verbose>0) i_info("FTS Xapian: Flushing memory and retrying"); + fts_backend_xapian_release(backend,"Flushing memory indexing hdr", 0); + if(fts_backend_xapian_check_access(backend)) + { + ok=fts_backend_xapian_index_hdr(backend,ctx->tbi_uid,ctx->tbi_field, &d2); + } + else + { + i_error("FTS Xapian: Buildmore: Can not open db (3)"); + } + } } else { ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2); + if(!ok) + { + if(verbose>0) i_info("FTS Xapian: Flushing memory and retrying"); + fts_backend_xapian_release(backend,"Flushing memory indexing text", 0); + if(fts_backend_xapian_check_access(backend)) + { + ok=fts_backend_xapian_index_text(backend,ctx->tbi_uid,ctx->tbi_field, &d2); + } + else + { + i_error("FTS Xapian: Buildmore: Can not open db (4)"); + } + } } backend->commit_updates++; - gettimeofday(&tp, NULL); - current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; + long current_time = fts_backend_xapian_current_time(); if( (!ok) || (backend->commit_updates>XAPIAN_COMMIT_ENTRIES) || ((current_time - backend->commit_time) > XAPIAN_COMMIT_TIMEOUT*1000) ) { @@ -541,8 +557,7 @@ static int fts_backend_xapian_optimize(struct fts_backend *_backend) { - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) _backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend; i_info("FTS Xapian: fts_backend_xapian_optimize '%s'",backend->path); @@ -580,8 +595,7 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_rescan"); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) _backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend; struct stat sb; if(!( (stat(backend->path, &sb)==0) && S_ISDIR(sb.st_mode))) @@ -631,16 +645,11 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup"); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) _backend; + struct xapian_fts_backend *backend = (struct xapian_fts_backend *) _backend; - if(fts_backend_xapian_set_box(backend, box)<0) - return -1; + if(fts_backend_xapian_set_box(backend, box)<0) return -1; - /* Performance calc */ - struct timeval tp; - gettimeofday(&tp, NULL); - long current_time = tp.tv_sec * 1000 + tp.tv_usec / 1000; + long current_time = fts_backend_xapian_current_time(); Xapian::Database * dbr; @@ -671,6 +680,7 @@ XResultSet * r=fts_backend_xapian_query(dbr,qs); long n=r->size; + if(verbose>0) { i_info("FTS Xapian: QUery '%s' -> %ld results",qs->get_string().c_str(),n); } i_array_init(&(result->definite_uids),r->size); @@ -696,9 +706,7 @@ /* Performance calc */ if(verbose>0) { - gettimeofday(&tp, NULL); - long dt = tp.tv_sec * 1000 + tp.tv_usec / 1000 - current_time; - i_info("FTS Xapian: %ld results in %ld ms",n,dt); + i_info("FTS Xapian: %ld results in %ld ms",n,fts_backend_xapian_current_time() - current_time); } return 0; } @@ -707,10 +715,8 @@ { if(verbose>0) i_info("FTS Xapian: fts_backend_xapian_lookup_multi"); - struct xapian_fts_backend *backend = - (struct xapian_fts_backend *) _backend; - ARRAY(struct fts_result) box_results; + struct fts_result *box_result; int i; @@ -719,12 +725,22 @@ { box_result = array_append_space(&box_results); box_result->box = boxes[i]; - if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, box_result)<1) return -1; + if(fts_backend_xapian_lookup(_backend, boxes[i], args, flags, box_result)<0) + { + void* p=&box_results; + p_free(result->pool, p); + return -1; + } } + + array_append_zero(&box_results); + result->box_results = array_idx_modifiable(&box_results, 0); + return 0; } -struct fts_backend fts_backend_xapian = { +struct fts_backend fts_backend_xapian = +{ .name = "xapian", .flags = FTS_BACKEND_FLAG_BUILD_FULL_WORDS, .v = { @@ -748,4 +764,3 @@ NULL } }; - diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c --- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.c 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.c 2021-04-24 16:27:55.000000000 -0400 @@ -7,13 +7,11 @@ void fts_xapian_plugin_init(struct module *module ATTR_UNUSED) { - //i_warning("fts_xapian_plugin_init"); fts_backend_register(&fts_backend_xapian); } void fts_xapian_plugin_deinit(void) { - //i_warning("fts_xapian_plugin_deinit"); fts_backend_unregister(fts_backend_xapian.name); } diff -Nru -w dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h --- dovecot-fts-xapian-1.4.7/src/fts-xapian-plugin.h 2021-01-31 14:06:29.000000000 -0500 +++ dovecot-fts-xapian-1.4.9a/src/fts-xapian-plugin.h 2021-04-24 16:27:55.000000000 -0400 @@ -5,13 +5,12 @@ #define FTS_XAPIAN_PLUGIN_H #include "lib.h" -#include "mail-storage-private.h" -#include "mailbox-list-private.h" -#include "mail-search.h" +#include "fts-api-private.h" #include "fts-api.h" -#include "module-context.h" +#include "mail-search.h" +#include "mail-storage-private.h" #include "mail-user.h" -#include "fts-api-private.h" +#include "module-context.h" #include "restrict-process-size.h" extern const char *fts_xapian_plugin_dependencies[];