On Mon, 24 Jan 2022 at 22:53, <[email protected]> wrote:
>
> This is an automated email from the ASF dual-hosted git repository.
>
> humbedooh pushed a commit to branch master
> in repository https://gitbox.apache.org/repos/asf/incubator-ponymail-foal.git
>
> commit bbd540b58c7fb4b4ca2974675182016c1cf135b4
> Author: Daniel Gruno <[email protected]>
> AuthorDate: Mon Jan 24 23:52:25 2022 +0100
>
>     Add plain html indexer for easing search engine indexing by certain 
> engines

This needs documenting.

> ---
>  server/endpoints/plain.py | 167 
> ++++++++++++++++++++++++++++++++++++++++++++++
>  1 file changed, 167 insertions(+)
>
> diff --git a/server/endpoints/plain.py b/server/endpoints/plain.py
> new file mode 100644
> index 0000000..14d2f9b
> --- /dev/null
> +++ b/server/endpoints/plain.py
> @@ -0,0 +1,167 @@
> +#!/usr/bin/env python3
> +# -*- coding: utf-8 -*-
> +# Licensed to the Apache Software Foundation (ASF) under one or more
> +# contributor license agreements.  See the NOTICE file distributed with
> +# this work for additional information regarding copyright ownership.
> +# The ASF licenses this file to You under the Apache License, Version 2.0
> +# (the "License"); you may not use this file except in compliance with
> +# the License.  You may obtain a copy of the License at
> +#
> +#     http://www.apache.org/licenses/LICENSE-2.0
> +#
> +# Unless required by applicable law or agreed to in writing, software
> +# distributed under the License is distributed on an "AS IS" BASIS,
> +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> +# See the License for the specific language governing permissions and
> +# limitations under the License.
> +
> +"""Plain text endpoint for enabling some search engines to index mail 
> archives"""
> +
> +import plugins.server
> +import plugins.session
> +import plugins.messages
> +import aiohttp.web
> +import html
> +
> +
> +def count_replies(thread):
> +    """Simple function for counting how many replies an email thread has"""
> +    count = 0
> +    for child in thread["children"]:
> +        count += count_replies(child) + 1
> +    return count
> +
> +
> +async def process(
> +    server: plugins.server.BaseServer,
> +    session: plugins.session.SessionObject,
> +    indata: dict,
> +) -> aiohttp.web.Response:
> +
> +    output = ""
> +    canonical_link = None
> +    title = "Apache Pony Mail"
> +
> +    # Has a list or thread id been provided?
> +    list_id = html.escape(indata.get("list", ""))
> +    thread_id = html.escape(indata.get("thread", ""))
> +
> +    # Show an email (or thread)
> +    if thread_id:
> +        canonical_link = f"""/thread.html/{thread_id}"""
> +        email = await plugins.messages.get_email(session, 
> permalink=thread_id)
> +        if email:
> +            listname = html.escape(
> +                "@".join(email.get("list_raw", "").strip("<>").split(".", 1))
> +            )
> +            date = html.escape(email.get("date", ""))
> +            author = html.escape(email.get("from"))
> +            output += f"""Posted to <a 
> href="/list.html?{listname}">{listname}</a> by {author} on {date} UTC<br/>"""
> +            title = html.escape(email.get("subject", ""))
> +            body = html.escape(email.get("body", ""))
> +            thread, emails, _pdocs = await plugins.messages.fetch_children(
> +                session, email
> +            )
> +            output += 
> f"""<h1>{email["subject"]}</h1><pre>{body}</pre><hr/>\n"""
> +            for tid, email in _pdocs.items():
> +                body = html.escape(email.get("body", ""))
> +                author = html.escape(email.get("from"))
> +                output += f"""<h2>{email["subject"]}</h2>\n<b>Posted by 
> {author}.</b><hr/><pre>{body}</pre><hr/>\n"""
> +    # Show a list
> +    elif list_id:
> +        # Make sure we can actually index this list
> +        can_view = False
> +        if list_id in server.data.lists:
> +            if not server.data.lists[list_id].get("private", True):
> +                can_view = True
> +        if can_view:
> +            l, d = list_id.split("@", 1)
> +            month = indata.get("date")
> +            mydata = {
> +                "list": l,
> +                "domain": d,
> +            }
> +
> +            # Do we have a specific month to show?
> +            if month:
> +                title = html.escape(f"{list_id}, {month}")
> +                mydata["date"] = month
> +                query_defuzzed = plugins.defuzzer.defuzz(mydata)
> +                canonical_link = f"/list.html?{list_id}:{month}"
> +                results = await plugins.messages.query(
> +                    session,
> +                    query_defuzzed,
> +                    query_limit=server.config.database.max_hits,
> +                )
> +                threads = plugins.messages.ThreadConstructor(results)
> +                thread_struct, authors = await 
> server.runners.run(threads.construct)
> +                for (
> +                    thread
> +                ) in (
> +                    thread_struct
> +                ):  # Make a list item for each thread (not for each email)
> +                    author = "Unknown"
> +                    date = "Unknown"
> +                    count = count_replies(thread)
> +                    # Find the email in the results pile and grab author and 
> date
> +                    for k in results:
> +                        if k["id"] == thread["tid"]:
> +                            author = html.escape(k["from"])
> +                            date = html.escape(k["date"])
> +                            break
> +                    output += f"""- <a 
> href="?thread={thread["tid"]}">{thread["subject"]}</a> - posted by {author} 
> on {date} UTC, {count} replies.<br/>\n"""
> +            # No month specified, which means just show all months with 
> email in 'em
> +            else:
> +                title = list_id
> +                canonical_link = f"/list.html?{list_id}"
> +                output = f"""<link rel="canonical" 
> href="/list.html?{list_id}" />\n"""
> +                query_defuzzed_nodate = plugins.defuzzer.defuzz(mydata, 
> nodate=True)
> +                (
> +                    oldest,
> +                    youngest,
> +                    active_months,
> +                ) = await plugins.messages.get_activity_span(
> +                    session, query_defuzzed_nodate
> +                )
> +                for month, activity in active_months.items():
> +                    output += (
> +                        f"""<a 
> href="?list={list_id}&date={month}">{month}</a><br/>"""
> +                    )
> +    else:  # Just list all lists?
> +        canonical_link = "/"
> +        output = f"""<link rel="canonical" href="/" />\n"""
> +        # Sort by domain, then by list name
> +        for ml in sorted(server.data.lists.keys(), key=lambda x: 
> x.split("@", 1)[-1] + "-" + x.split("@", 1)[0]):
> +            entry = server.data.lists[ml]
> +            if "@" in ml:
> +                if not entry.get("private", True):  # Only index public lists
> +                    output += f"<a href='?list={ml}'>{ml}</a><br/>\n"
> +
> +    if output and canonical_link:
> +        output_interpolated = f"""
> +        <html>
> +            <head>
> +                <link rel="canonical" href="{canonical_link}" />
> +                <title>{title}</title>
> +            </head>
> +            <body>
> +                <i>You are viewing a plain text version of this content. The 
> canonical link for it is <a href="{canonical_link}">here</a>.</i><hr/>
> +                {output}
> +            </body>
> +        </html>
> +        """
> +        return aiohttp.web.Response(
> +            headers={"Content-Type": "text/html; charset=utf-8"},
> +            status=200,
> +            text=output_interpolated,
> +        )
> +    else:
> +        return aiohttp.web.Response(
> +            headers={"Content-Type": "text/plain"},
> +            status=200,
> +            text="No data",
> +        )
> +
> +
> +def register(server: plugins.server.BaseServer):
> +    return plugins.server.Endpoint(process)

Reply via email to