(petri) 01/01: Testing migration process on petri-site

dfoulks Fri, 17 May 2024 09:00:58 -0700

This is an automated email from the ASF dual-hosted git repository.

dfoulks pushed a commit to branch dfoulks/pelican-gha
in repository https://gitbox.apache.org/repos/asf/petri.git


commit 2812e6a8972d0bb97b5e9658276406f76ca71021
Author: Drew <[email protected]>
AuthorDate: Fri May 17 12:00:42 2024 -0400

    Testing migration process on petri-site
---
 .github/workflows/build-pelican.yml |  44 +++++++
 pelicanconf.py                      |  49 ++++++++
 theme/plugins/gfm.py                | 237 ++++++++++++++++++++++++++++++++++++
 3 files changed, 330 insertions(+)

diff --git a/.github/workflows/build-pelican.yml 
b/.github/workflows/build-pelican.yml
new file mode 100644
index 0000000..ca20af8
--- /dev/null
+++ b/.github/workflows/build-pelican.yml
@@ -0,0 +1,44 @@
+name: Build a Pelican Website
+on:
+  push:
+    branches: [ "dfoulks/pelican_gha" ]
+    paths-ignore:
+      - 'output/**'
+  workflow_dispatch:
+env:
+  LIBCMARKDIR: 
/home/runner/work/infrastructure-website/infrastructure-website/cmark-gfm-0.28.3.gfm.12/lib
+jobs:
+  build-pelican:
+    runs-on: ubuntu-latest
+    continue-on-error: true
+    steps:
+      - uses: actions/checkout@v4
+        with:
+          repository: ''
+          ref: 'dfoulks/pelican_gha'
+      - name: Install Pelican
+        run: pip3 install pelican markdown ghp-import bs4
+      # Optionally, if your website uses the gfm plugin uncomment the GFM block
+      #########################
+      # START BUILD GFM BLOCK #
+      #########################
+      - name: fetch libcmark-gfm.so buildscript
+        run: wget 
https://raw.githubusercontent.com/apache/infrastructure-pelican/master/bin/build-cmark.sh
+      - name: build libcmark-gfm.so
+        run: /bin/bash ./build-cmark.sh
+      #######################
+      # END BUILD GFM BLOCK #
+      #######################
+      - name: Generate website from markdown
+        run: python3 -m pelican content -o output
+        working-directory: 
'/home/runner/work/infrastructure-website/infrastructure-website'
+      - name: Open a PR against the staging branch
+        uses: peter-evans/[email protected]
+        with:
+          token: ${{ secrets.GITHUB_TOKEN }}
+          commit-message: GitHub Actions Generated Pelican Build
+          title: Generated Pelican Output
+          body: output generated
+          add-paths: |
+            output/
+          base: dfoulks/gha-site
diff --git a/pelicanconf.py b/pelicanconf.py
new file mode 100644
index 0000000..567b244
--- /dev/null
+++ b/pelicanconf.py
@@ -0,0 +1,49 @@
+
+# Basic information about the site.
+SITENAME = 'Apache Petri'
+SITEDESC = 'Assists external project communities interested in becoming an 
Apache project learn how The ASF works and its views on how to build a healthy 
community'
+SITEDOMAIN = 'petri.apache.org'
+SITEURL = 'https://petri.apache.org'
+SITELOGO = 'https://petri.apache.org/images/logo.png'
+SITEREPOSITORY = 'https://github.com/apache/petri/blob/master/content/'
+CURRENTYEAR = 2024
+TRADEMARKS = 'Apache, the Apache feather logo, and Petri are trademarks or 
registered trademarks'
+TIMEZONE = 'UTC'
+# Theme includes templates and possibly static files
+THEME = 'theme/apache'
+# Specify location of plugins, and which to use
+PLUGIN_PATHS = [ 'theme/plugins',  ]
+PLUGINS = [ 'toc2', 'gfm',  ]
+# All content is located at '.' (aka content/ )
+PAGE_PATHS = [ '.' ]
+STATIC_PATHS = [ '.',  ]
+# Where to place/link generated pages
+
+PATH_METADATA = '(?P<path_no_ext>.*)\\..*'
+
+PAGE_SAVE_AS = '{path_no_ext}.html'
+# Don't try to translate
+PAGE_TRANSLATION_ID = None
+# Disable unused Pelican features
+# N.B. These features are currently unsupported, see 
https://github.com/apache/infrastructure-pelican/issues/49
+FEED_ALL_ATOM = None
+INDEX_SAVE_AS = ''
+TAGS_SAVE_AS = ''
+CATEGORIES_SAVE_AS = ''
+AUTHORS_SAVE_AS = ''
+ARCHIVES_SAVE_AS = ''
+# Disable articles by pointing to a (should-be-absent) subdir
+ARTICLE_PATHS = [ 'blog' ]
+# needed to create blogs page
+ARTICLE_URL = 'blog/{slug}.html'
+ARTICLE_SAVE_AS = 'blog/{slug}.html'
+# Disable all processing of .html files
+READERS = { 'html': None, }
+
+
+
+
+
+
+
+
diff --git a/theme/plugins/gfm.py b/theme/plugins/gfm.py
new file mode 100644
index 0000000..597b831
--- /dev/null
+++ b/theme/plugins/gfm.py
@@ -0,0 +1,237 @@
+#!/usr/bin/python
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#
+# gfm_reader.py -- GitHub-Flavored Markdown reader for Pelican
+#
+
+import sys
+import os.path
+import ctypes
+import re
+import platform
+
+import pelican.utils
+import pelican.plugins.signals
+import pelican.readers
+
+_LIBDIR = os.environ['LIBCMARKDIR']
+if platform.system() == 'Darwin':
+    _LIBEXT = '.dylib'
+else:
+    _LIBEXT = '.so'
+_LIBCMARK = f'libcmark-gfm{_LIBEXT}'
+try:
+    cmark = ctypes.CDLL(os.path.join(_LIBDIR, _LIBCMARK))
+except OSError as e:
+    raise ImportError('%s not found. See build-cmark.sh. Error:\n%s' % 
(_LIBCMARK, e))
+
+# Newer releases have different naming for this library. Try it first.
+try:
+    cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, 
f'libcmark-gfm-extensions{_LIBEXT}'))
+    ENSURE_REGISTERED = 'cmark_gfm_core_extensions_ensure_registered'
+except OSError:
+    # Try the older name for the library.
+    try:
+        cmark_ext = ctypes.CDLL(os.path.join(_LIBDIR, 
f'libcmark-gfmextensions{_LIBEXT}'))
+        ENSURE_REGISTERED = 'core_extensions_ensure_registered'
+    except OSError:
+        #print('LIBDIR:', _LIBDIR)
+        raise ImportError('GFM Extensions not found. See build-cmark.sh')
+#print(f'USING: {ENSURE_REGISTERED}')
+
+
+# Use ctypes to access the functions in libcmark-gfm
+F_cmark_parser_new = cmark.cmark_parser_new
+F_cmark_parser_new.restype = ctypes.c_void_p
+F_cmark_parser_new.argtypes = (ctypes.c_int,)
+
+F_cmark_parser_feed = cmark.cmark_parser_feed
+F_cmark_parser_feed.restype = None
+F_cmark_parser_feed.argtypes = (ctypes.c_void_p, ctypes.c_char_p, 
ctypes.c_size_t)
+
+F_cmark_parser_finish = cmark.cmark_parser_finish
+F_cmark_parser_finish.restype = ctypes.c_void_p
+F_cmark_parser_finish.argtypes = (ctypes.c_void_p,)
+
+F_cmark_parser_attach_syntax_extension = 
cmark.cmark_parser_attach_syntax_extension
+F_cmark_parser_attach_syntax_extension.restype = ctypes.c_int
+F_cmark_parser_attach_syntax_extension.argtypes = (ctypes.c_void_p, 
ctypes.c_void_p)
+
+F_cmark_parser_get_syntax_extensions = cmark.cmark_parser_get_syntax_extensions
+F_cmark_parser_get_syntax_extensions.restype = ctypes.c_void_p
+F_cmark_parser_get_syntax_extensions.argtypes = (ctypes.c_void_p,)
+
+F_cmark_parser_free = cmark.cmark_parser_free
+F_cmark_parser_free.restype = None
+F_cmark_parser_free.argtypes = (ctypes.c_void_p,)
+
+F_cmark_node_free = cmark.cmark_node_free
+F_cmark_node_free.restype = None
+F_cmark_node_free.argtypes = (ctypes.c_void_p,)
+
+F_cmark_find_syntax_extension = cmark.cmark_find_syntax_extension
+F_cmark_find_syntax_extension.restype = ctypes.c_void_p
+F_cmark_find_syntax_extension.argtypes = (ctypes.c_char_p,)
+
+F_cmark_render_html = cmark.cmark_render_html
+F_cmark_render_html.restype = ctypes.c_char_p
+F_cmark_render_html.argtypes = (ctypes.c_void_p, ctypes.c_int, ctypes.c_void_p)
+
+
+# Set up the libcmark-gfm library and its extensions
+F_register = getattr(cmark_ext, ENSURE_REGISTERED)
+F_register.restype = None
+F_register.argtypes = ( )
+F_register()
+
+### technically, maybe install an atexit() to release the plugins
+
+# Options for the GFM rendering call
+### this could be moved into SETTINGS or somesuch, but meh. not needed now.
+OPTS = 0
+
+# The GFM extensions that we want to use
+EXTENSIONS = (
+    'autolink',
+    'table',
+    'strikethrough',
+    'tagfilter',
+)
+
+
+class GFMReader(pelican.readers.BaseReader):
+    enabled = True
+    """GFM-flavored Reader for the Pelican system.
+
+    Pelican looks for all subclasses of BaseReader, and automatically
+    registers them for the file extensions listed below. Thus, nothing
+    further is required by users of this Reader.
+    """
+
+    # NOTE: the builtin MarkdownReader must be disabled. Otherwise, it will be
+    #       non-deterministic which Reader will be used for these files.
+    file_extensions = ['md', 'markdown', 'mkd', 'mdown']
+
+    # Metadata is specified as a single, colon-separated line, such as:
+    #
+    # Title: this is the title
+    #
+    # Note: name starts in column 0, no whitespace before colon, will be
+    #       made lower-case, and value will be stripped
+    #
+    RE_METADATA = re.compile('^([A-za-z]+): (.*)$')
+
+    def read_source(self, source_path):
+        "Read metadata and content from the source."
+
+        # Prepare the "slug", which is the target file name. It will be the
+        # same as the source file, minus the leading 
".../content/(articles|pages)"
+        # and with the extension removed (Pelican will add .html)
+        relpath = os.path.relpath(source_path, self.settings['PATH'])
+        parts = relpath.split(os.sep)
+        parts[-1] = os.path.splitext(parts[-1])[0]  # split off ext, keep base
+        slug = os.sep.join(parts[1:])
+
+        metadata = {
+            'slug': slug,
+        }
+        # Fetch the source content, with a few appropriate tweaks
+        with pelican.utils.pelican_open(source_path) as text:
+
+            # Extract the metadata from the header of the text
+            lines = text.splitlines()
+            i = 0 # See 
https://github.com/apache/infrastructure-pelican/issues/70
+            for i in range(len(lines)):
+                line = lines[i]
+                match = GFMReader.RE_METADATA.match(line)
+                if match:
+                    name = match.group(1).strip().lower()
+                    if name != 'slug':
+                        value = match.group(2).strip()
+                        if name == 'date':
+                            value = pelican.utils.get_date(value)
+                    metadata[name] = value
+                    #if name != 'title':
+                    #  print 'META:', name, value
+                elif not line.strip():
+                    # blank line
+                    continue
+                else:
+                    # reached actual content
+                    break
+
+            # Redo the slug for articles.
+            # depending on pelicanconf.py this will change the output filename
+            if parts[0] == 'articles' and 'title' in metadata:
+                metadata['slug'] = pelican.utils.slugify(
+                    metadata['title'],
+                    self.settings.get('SLUG_SUBSTITUTIONS', ()))
+
+            # Reassemble content, minus the metadata
+            text = '\n'.join(lines[i:])
+
+            return text, metadata
+
+    def read(self, source_path):
+        "Read metadata and content then render into HTML."
+
+        # read metadata and markdown content
+        text, metadata = self.read_source(source_path)
+        assert text, 'Text must not be empty'
+        assert metadata, 'Metadata must not be empty'
+        # Render the markdown into HTML
+        if sys.version_info >= (3, 0):
+            text = text.encode('utf-8')
+            content = self.render(text).decode('utf-8')
+        else:
+            content = self.render(text)
+        assert content, 'Did not expect content to be empty'
+
+        return content, metadata
+
+    def render(self, text):
+        "Use cmark-gfm to render the Markdown into an HTML fragment."
+
+        parser = F_cmark_parser_new(OPTS)
+        assert parser, 'Failed to initialise parser'
+        for name in EXTENSIONS:
+            ext = F_cmark_find_syntax_extension(name.encode('utf-8'))
+            assert ext, 'Failed to find UTF-8 extension'
+            rv = F_cmark_parser_attach_syntax_extension(parser, ext)
+            assert rv, 'Failed to attach the UTF-8 extension'
+        exts = F_cmark_parser_get_syntax_extensions(parser)
+        F_cmark_parser_feed(parser, text, len(text))
+        doc = F_cmark_parser_finish(parser)
+        assert doc, 'Did not expect rendered output to be empty'
+
+        output = F_cmark_render_html(doc, OPTS, exts)
+
+        F_cmark_parser_free(parser)
+        F_cmark_node_free(doc)
+
+        return output
+
+
+def add_readers(readers):
+    readers.reader_classes['md'] = GFMReader
+
+
+def register():
+    pelican.plugins.signals.readers_init.connect(add_readers)

(petri) 01/01: Testing migration process on petri-site

Reply via email to