On Sun, 17 Oct 2021 at 23:29, Daniel Gruno <[email protected]> wrote: > > On 18/10/2021 00.25, sebb wrote: > > On Sun, 17 Oct 2021 at 23:19, <[email protected]> wrote: > >> > >> This is an automated email from the ASF dual-hosted git repository. > >> > >> humbedooh pushed a commit to branch master > >> in repository > >> https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git > >> > >> > >> The following commit(s) were added to refs/heads/master by this push: > >> new d843003 +1 will suffice > >> d843003 is described below > >> > >> commit d8430036d92e8a89c693277a7f5c5c4c262f352c > >> Author: Daniel Gruno <[email protected]> > >> AuthorDate: Mon Oct 18 00:19:40 2021 +0200 > >> > >> +1 will suffice > >> --- > >> tools/archiver.py | 2 +- > >> tools/migrate.py | 3 ++- > >> 2 files changed, 3 insertions(+), 2 deletions(-) > >> > >> diff --git a/tools/archiver.py b/tools/archiver.py > >> index f14d7f6..93a29a8 100755 > >> --- a/tools/archiver.py > >> +++ b/tools/archiver.py > >> @@ -588,7 +588,7 @@ class Archiver(object): # N.B. Also used by > >> import-mbox.py > >> > >> notes.append(["ARCHIVE: Email archived as %s at %u" % > >> (document_id, time.time())]) > >> body_unflowed = body.unflow() if body else "" > >> - body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+10] # +10 > >> so that we can tell if larger than std short body. > >> + body_shortened = body_unflowed[:SHORT_BODY_MAX_LEN+1] # +1 > >> so that we can tell if larger than std short body. > >> > >> output_json = { > >> "from_raw": msg_metadata["from"], > >> diff --git a/tools/migrate.py b/tools/migrate.py > >> index 2493465..c46b8ef 100644 > >> --- a/tools/migrate.py > >> +++ b/tools/migrate.py > >> @@ -201,7 +201,8 @@ def process_document(old_es, doc, old_dbname, > >> dbname_source, dbname_mbox, do_dki > >> doc["_source"]["dbid"] = hashlib.sha3_256(source_text).hexdigest() > >> > >> # Add in shortened body for search aggs > >> - doc["_source"]["body_short"] = > >> doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+10] > >> + # We add +1 to know whether to use ellipsis in reports. > >> + doc["_source"]["body_short"] = > >> doc["_source"]["body"][:archiver.SHORT_BODY_MAX_LEN+1] > > > > Why +1 here? > > Cosmetic reasons, we need to know whether to add '...' to the body when > potentially shortening it. If we cap at 200, we won't know if it's >200, > so we cap at 200+1. Alternatively, we would add a new field that had > either a bool (shortened or not) or add a body_length field that we > could work with, but that seems a tad overkill. HTH
Yes, I know. But a person reading the code later might not, as this is not documented. > > > >> # Add in gravatar > >> header_from = doc["_source"]["from"] >
