This is an automated email from the ASF dual-hosted git repository.
sebb pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
commit 8c48b5b70c4dc5d0831bfde9e1e8e53129c6aa34
Author: Sebb <[email protected]>
AuthorDate: Sun Dec 12 12:26:01 2021 +0000
mbox.py is inefficient for large mailboxes
This fixes #172
---
server/endpoints/mbox.py | 15 +++++++--------
server/plugins/messages.py | 40 ++++++++++++++++++++++++++++++++--------
2 files changed, 39 insertions(+), 16 deletions(-)
diff --git a/server/endpoints/mbox.py b/server/endpoints/mbox.py
index fb6ee2b..8b34362 100644
--- a/server/endpoints/mbox.py
+++ b/server/endpoints/mbox.py
@@ -86,13 +86,6 @@ async def process(
return aiohttp.web.Response(headers={"content-type": "text/plain",},
status=400, text=str(ve))
except AssertionError as ae: # If defuzzer encounters internal errors,
it will throw an AssertionError
return aiohttp.web.Response(headers={"content-type": "text/plain",},
status=500, text=str(ae))
- results = await plugins.messages.query(
- session,
- query_defuzzed,
- query_limit=server.config.database.max_hits,
- metadata_only=True,
- epoch_order="asc"
- )
dlstem = f"{lid}_{domain}"
if yyyymm:
@@ -109,7 +102,13 @@ async def process(
response = aiohttp.web.StreamResponse(status=200, headers=headers)
response.enable_chunked_encoding()
await response.prepare(request)
- for email in results:
+
+ async for email in plugins.messages.query_each(
+ session,
+ query_defuzzed,
+ metadata_only=True,
+ epoch_order="asc"
+ ):
mboxrd_source = await convert_source(session, email)
# Ensure each non-empty source ends with a blank line
if not mboxrd_source.endswith("\n\n"):
diff --git a/server/plugins/messages.py b/server/plugins/messages.py
index f6abcba..47ca7d7 100644
--- a/server/plugins/messages.py
+++ b/server/plugins/messages.py
@@ -316,10 +316,9 @@ async def get_source(session:
plugins.session.SessionObject, permalink: str = No
return None
-async def query(
+async def query_each(
session: plugins.session.SessionObject,
query_defuzzed,
- query_limit=10000,
hide_deleted=True,
metadata_only=False,
epoch_order="desc",
@@ -328,9 +327,8 @@ async def query(
"""
Advanced query and grab for stats.py
Also called by mbox.py (using metadata_only=True)
+ Yields results singly
"""
- docs = []
- hits = 0
assert session.database, DATABASE_NOT_CONNECTED
preserve_order = True if epoch_order == "asc" else False
es_query = {
@@ -378,10 +376,36 @@ async def query(
for hdr in MUST_HAVE:
if not hdr in source_fields and hdr in doc:
del doc[hdr]
- docs.append(doc)
- hits += 1
- if hits > query_limit:
- break
+ yield doc
+
+
+async def query(
+ session: plugins.session.SessionObject,
+ query_defuzzed,
+ query_limit=10000,
+ hide_deleted=True,
+ metadata_only=False,
+ epoch_order="desc",
+ source_fields=None
+):
+ """
+ Advanced query and grab for stats.py
+ Also called by mbox.py (using metadata_only=True)
+ """
+ docs = []
+ hits = 0
+ async for doc in query_each(
+ session,
+ query_defuzzed,
+ hide_deleted=hide_deleted,
+ metadata_only=metadata_only,
+ epoch_order=epoch_order,
+ source_fields=source_fields
+ ):
+ docs.append(doc)
+ hits += 1
+ if hits > query_limit:
+ break
return docs