Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-html5-parser for 
openSUSE:Factory checked in at 2023-06-23 21:52:42
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
 and      /work/SRC/openSUSE:Factory/.python-html5-parser.new.15902 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Package is "python-html5-parser"

Fri Jun 23 21:52:42 2023 rev:13 rq:1094741 version:0.4.11

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes  
2021-12-23 17:54:15.747738148 +0100
+++ 
/work/SRC/openSUSE:Factory/.python-html5-parser.new.15902/python-html5-parser.changes
       2023-06-23 21:52:45.266666455 +0200
@@ -1,0 +2,9 @@
+Fri Jun 23 04:59:34 UTC 2023 - ecsos <ec...@opensuse.org>
+
+- Update to 0.4.11
+  No changelog from upstream.
+  See instead here:
+  
https://github.com/kovidgoyal/html5-parser/compare/v0.4.10...v0.4.11?diff=unified&name=v0.4.11
+- Add %{?sle15_python_module_pythons}
+
+-------------------------------------------------------------------

Old:
----
  python-html5-parser-0.4.10.tar.gz

New:
----
  python-html5-parser-0.4.11.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.GOyuNz/_old  2023-06-23 21:52:45.834669710 +0200
+++ /var/tmp/diff_new_pack.GOyuNz/_new  2023-06-23 21:52:45.838669732 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-html5-parser
 #
-# Copyright (c) 2021 SUSE LLC
+# Copyright (c) 2023 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -16,10 +16,9 @@
 #
 
 
-%{?!python_module:%define python_module() python-%{**} python3-%{**}}
-%define skip_python2 1
+%{?sle15_python_module_pythons}
 Name:           python-html5-parser
-Version:        0.4.10
+Version:        0.4.11
 Release:        0
 Summary:        C based HTML 5 parsing for Python
 License:        Apache-2.0

++++++ python-html5-parser-0.4.10.tar.gz -> python-html5-parser-0.4.11.tar.gz 
++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/.github/workflows/ci.py 
new/html5-parser-0.4.11/.github/workflows/ci.py
--- old/html5-parser-0.4.10/.github/workflows/ci.py     2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/.github/workflows/ci.py     2023-04-12 
07:07:46.000000000 +0200
@@ -45,7 +45,7 @@
     elif which == 'test':
         builder = os.environ['BUILDER']
         run(sys.executable, builder, 'test')
-        if builder == 'build.py':
+        if builder == 'unix_build.py':
             run(sys.executable, builder, 'leak')
     else:
         raise SystemExit('Unknown action:', which)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/.github/workflows/ci.yml 
new/html5-parser-0.4.11/.github/workflows/ci.yml
--- old/html5-parser-0.4.10/.github/workflows/ci.yml    2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/.github/workflows/ci.yml    2023-04-12 
07:07:46.000000000 +0200
@@ -15,15 +15,15 @@
         strategy:
             matrix:
                 include:
-                    - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: 
gcc }
-                    - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: 
clang }
-                    - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: 
gcc }
-                    - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: 
clang }
-                    - { pyver: 3.8, builder: setup.py, os: ubuntu-latest, cc: 
gcc }
+                    - { pyver: "2.7", builder: unix_build.py, os: 
ubuntu-latest, cc: gcc }
+                    - { pyver: "2.7", builder: unix_build.py, os: 
ubuntu-latest, cc: clang }
+                    - { pyver: "3.8", builder: unix_build.py, os: 
ubuntu-latest, cc: gcc }
+                    - { pyver: "3.8", builder: unix_build.py, os: 
ubuntu-latest, cc: clang }
+                    - { pyver: "3.10", builder: setup.py, os: ubuntu-latest, 
cc: gcc }
 
-                    - { pyver: 3.8, builder: setup.py, os: macos-latest, cc: 
clang }
+                    - { pyver: "3.10", builder: setup.py, os: macos-latest, 
cc: clang }
 
-                    - { pyver: 3.8, builder: setup.py, os: windows-latest, cc: 
cl }
+                    - { pyver: "3.10", builder: setup.py, os: windows-2019, 
cc: cl }
 
         steps:
             - name: Checkout source code
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/.github/workflows/win-ci.py 
new/html5-parser-0.4.11/.github/workflows/win-ci.py
--- old/html5-parser-0.4.10/.github/workflows/win-ci.py 2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/.github/workflows/win-ci.py 2023-04-12 
07:07:46.000000000 +0200
@@ -16,10 +16,10 @@
 import tarfile
 import time
 
-ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.11")
+ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.13")
 LIBXML2 = "ftp://xmlsoft.org/libxml2/libxml2-{}.tar.gz".format('2.9.4')
 LIBXSLT = "ftp://xmlsoft.org/libxml2/libxslt-{}.tar.gz".format('1.1.28')
-LXML = 
"https://files.pythonhosted.org/packages/c5/2f/a0d8aa3eee6d53d5723d89e1fc32eee11e76801b424e30b55c7aa6302b01/lxml-4.6.1.tar.gz";
  # noqa
+LXML = 
"https://files.pythonhosted.org/packages/06/5a/e11cad7b79f2cf3dd2ff8f81fa8ca667e7591d3d8451768589996b65dec1/lxml-4.9.2.tar.gz";
  # noqa
 SW = os.path.abspath('sw')
 PYTHON = os.path.abspath(sys.executable)
 os.environ['SW'] = SW
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/MANIFEST.in 
new/html5-parser-0.4.11/MANIFEST.in
--- old/html5-parser-0.4.10/MANIFEST.in 2021-09-22 09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/MANIFEST.in 2023-04-12 07:07:46.000000000 +0200
@@ -1,5 +1,5 @@
 exclude *.py
-include setup.py build.py run_tests.py gen*.py win-ci.py
+include setup.py unix_build.py run_tests.py gen*.py win-ci.py
 include LICENSE README.rst
 include gumbo/*.c gumbo/*.h gumbo/*.py gumbo/*.rl
 include src/*.c src/*.h
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/build.py 
new/html5-parser-0.4.11/build.py
--- old/html5-parser-0.4.10/build.py    2021-09-22 09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/build.py    1970-01-01 01:00:00.000000000 +0100
@@ -1,300 +0,0 @@
-#!/usr/bin/env python
-# vim:fileencoding=utf-8
-# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
-
-from __future__ import (absolute_import, division, print_function, 
unicode_literals)
-
-import argparse
-import errno
-import glob
-import os
-import re
-import shlex
-import shutil
-import subprocess
-import sys
-import sysconfig
-from collections import namedtuple
-from copy import deepcopy
-from itertools import chain
-
-self_path = os.path.abspath(__file__)
-base = os.path.dirname(self_path)
-build_dir = os.path.join(base, 'build', 'custom')
-freeze_dir = os.path.join(base, 'build', 'html5_parser')
-_plat = sys.platform.lower()
-isosx = 'darwin' in _plat
-iswindows = hasattr(sys, 'getwindowsversion')
-is_ci = os.environ.get('CI') == 'true'
-Env = namedtuple('Env', 'cc cflags ldflags linker debug cc_name cc_ver')
-PKGCONFIG = os.environ.get('PKGCONFIG_EXE', 'pkg-config')
-with open(os.path.join(base, 'src/python-wrapper.c'), 'rb') as f:
-    raw = f.read().decode('utf-8')
-version = tuple(
-    map(
-        int, (
-            re.search(r'^#define MAJOR (\d+)', raw, 
flags=re.MULTILINE).group(1), re.search(
-                r'^#define MINOR (\d+)', raw, flags=re.MULTILINE).group(1), 
re.search(
-                    r'^#define PATCH (\d+)', raw, 
flags=re.MULTILINE).group(1), )))
-
-
-def safe_makedirs(path):
-    try:
-        os.makedirs(path)
-    except EnvironmentError as err:
-        if err.errno != errno.EEXIST:
-            raise
-
-
-def add_python_flags(env, return_libs=False):
-    env.cflags.extend('-I' + sysconfig.get_path(x) for x in 'include 
platinclude'.split())
-    libs = []
-    libs += sysconfig.get_config_var('LIBS').split()
-    libs += sysconfig.get_config_var('SYSLIBS').split()
-    fw = sysconfig.get_config_var('PYTHONFRAMEWORK')
-    if fw:
-        for var in 'data include stdlib'.split():
-            val = sysconfig.get_path(var)
-            if val and '/{}.framework'.format(fw) in val:
-                fdir = val[:val.index('/{}.framework'.format(fw))]
-                if os.path.isdir(os.path.join(fdir, 
'{}.framework'.format(fw))):
-                    framework_dir = fdir
-                    break
-        else:
-            raise SystemExit('Failed to find Python framework')
-        libs.append(os.path.join(framework_dir, 
sysconfig.get_config_var('LDLIBRARY')))
-    else:
-        libs += ['-L' + sysconfig.get_config_var('LIBDIR')]
-        libs += ['-lpython' + sysconfig.get_config_var('VERSION') + 
getattr(sys, 'abiflags', '')]
-        libs += sysconfig.get_config_var('LINKFORSHARED').split()
-    env.ldflags.extend(libs)
-    return libs if return_libs else env
-
-
-def pkg_config(pkg, *args):
-    try:
-        val = subprocess.check_output([PKGCONFIG, pkg] + 
list(args)).decode('utf-8')
-    except EnvironmentError as err:
-        if err.errno == errno.ENOENT:
-            raise SystemExit('pkg-config is required to build html5-parser')
-        raise
-    return list(filter(None, map(str, shlex.split(val))))
-
-
-def env_var(which, default='', split=os.pathsep):
-    val = str(os.environ.get(which, default))
-    if not split:
-        return val
-    return list(filter(None, val.split(split)))
-
-
-def include_dirs():
-    if 'LIBXML_INCLUDE_DIRS' in os.environ:
-        return env_var('LIBXML_INCLUDE_DIRS')
-    return [x[2:] for x in pkg_config('libxml-2.0', '--cflags-only-I')]
-
-
-def libraries():
-    if iswindows:
-        return env_var('LIBXML_LIBS', 'libxml2')
-    if 'LIBXML_LIBS' in os.environ:
-        return env_var('LIBXML_LIBS')
-    return [x[2:] for x in pkg_config('libxml-2.0', '--libs-only-l')]
-
-
-def library_dirs():
-    if 'LIBXML_LIB_DIRS' in os.environ:
-        return env_var('LIBXML_LIB_DIRS')
-    return [x[2:] for x in pkg_config('libxml-2.0', '--libs-only-L')]
-
-
-def cc_version():
-    cc = os.environ.get('CC', 'gcc')
-    raw = subprocess.check_output([cc, '-dM', '-E', '-'], 
stdin=open(os.devnull, 'rb'))
-    m = re.search(br'^#define __clang__ 1', raw, flags=re.M)
-    cc_name = 'gcc' if m is None else 'clang'
-    ver = int(re.search(br'#define __GNUC__ (\d+)', raw, 
flags=re.M).group(1)), int(
-        re.search(br'#define __GNUC_MINOR__ (\d+)', raw, flags=re.M).group(1))
-    return cc, ver, cc_name
-
-
-def get_sanitize_args(cc, ccver):
-    sanitize_args = set()
-    if cc == 'gcc' and ccver < (4, 8):
-        return sanitize_args
-    sanitize_args.add('-fno-omit-frame-pointer')
-    sanitize_args.add('-fsanitize=address')
-    if (cc == 'gcc' and ccver >= (5, 0)) or (cc == 'clang' and not isosx):
-        # clang on macOS does not support -fsanitize=undefined
-        sanitize_args.add('-fsanitize=undefined')
-        # if cc == 'gcc' or (cc == 'clang' and ccver >= (4, 2)):
-        #     sanitize_args.add('-fno-sanitize-recover=all')
-    return sanitize_args
-
-
-def init_env(debug=False, sanitize=False, native_optimizations=False, 
add_python=True):
-    native_optimizations = (native_optimizations and not sanitize and not 
debug)
-    cc, ccver, cc_name = cc_version()
-    stack_protector = '-fstack-protector'
-    if ccver >= (4, 9) and cc_name == 'gcc':
-        stack_protector += '-strong'
-    missing_braces = ''
-    if ccver < (5, 2) and cc_name == 'gcc':
-        missing_braces = '-Wno-missing-braces'
-    optimize = '-ggdb' if debug or sanitize else '-O3'
-    sanitize_args = get_sanitize_args(cc_name, ccver) if sanitize else set()
-    cflags = os.environ.get(
-        'OVERRIDE_CFLAGS', (
-            '-Wextra -Wno-missing-field-initializers -Wall -std=c99 
-fvisibility=hidden'
-            ' -pedantic-errors -Werror {} {} -D{}DEBUG -fwrapv {} {} -pipe 
{}').format(
-                optimize, ' '.join(sanitize_args), ('' if debug else 'N'), 
stack_protector,
-                missing_braces, '-march=native' if native_optimizations else 
''))
-    libxml_cflags = pkg_config('libxml-2.0', '--cflags')
-    cflags = shlex.split(cflags) + libxml_cflags + 
shlex.split(sysconfig.get_config_var('CCSHARED'))
-    ldflags = os.environ.get(
-        'OVERRIDE_LDFLAGS', '-Wall -shared ' + ' '.join(sanitize_args) + ('' 
if debug else ' -O3'))
-    libxml_ldflags = pkg_config('libxml-2.0', '--libs')
-    ldflags = shlex.split(ldflags) + libxml_ldflags
-    cflags += shlex.split(os.environ.get('CFLAGS', ''))
-    ldflags += shlex.split(os.environ.get('LDFLAGS', ''))
-    cflags.append('-pthread')
-    ans = Env(cc, cflags, ldflags, cc, debug, cc_name, ccver)
-    return add_python_flags(ans) if add_python else ans
-
-
-def run_tool(cmd):
-    if hasattr(cmd, 'lower'):
-        cmd = shlex.split(cmd)
-    print(' '.join(cmd))
-    p = subprocess.Popen(cmd)
-    ret = p.wait()
-    if ret != 0:
-        raise SystemExit(ret)
-
-
-def newer(dest, *sources):
-    try:
-        dtime = os.path.getmtime(dest)
-    except EnvironmentError:
-        return True
-    for s in chain(sources, (self_path, )):
-        if os.path.getmtime(s) >= dtime:
-            return True
-    return False
-
-
-def find_c_files(src_dir):
-    ans, headers = [], []
-    for x in sorted(os.listdir(src_dir)):
-        ext = os.path.splitext(x)[1]
-        if ext == '.c' and not x.endswith('-check.c'):
-            ans.append(os.path.join(src_dir, x))
-        elif ext == '.h':
-            headers.append(os.path.join(src_dir, x))
-    ans.sort(key=os.path.getmtime, reverse=True)
-    return tuple(ans), tuple(headers)
-
-
-def build_obj(src, env, headers):
-    suffix = '-debug' if env.debug else ''
-    obj = os.path.join(build_dir, os.path.basename(src).rpartition('.')[0] + 
suffix + '.o')
-    if newer(obj, src, *headers):
-        cflags = list(env.cflags)
-        if src.endswith('char_ref.c'):
-            cflags.append('-Wno-unused-const-variable')
-        cmd = [env.cc] + cflags + ['-c', src] + ['-o', obj]
-        run_tool(cmd)
-    return obj
-
-
-TEST_EXE = os.path.join(build_dir, 'test')
-MEMLEAK_EXE = os.path.join(build_dir, 'mem-leak-check')
-if is_ci:
-    TEST_EXE = os.path.join(os.path.dirname(os.path.abspath(sys.executable)), 
'test-html5-parser')
-SRC_DIRS = 'src gumbo'.split()
-MOD_EXT = '.so'
-
-
-def link(objects, env):
-    dest = os.path.join(build_dir, 'html_parser' + MOD_EXT)
-    o = ['-o', dest]
-    cmd = [env.linker] + objects + o + env.ldflags
-    if newer(dest, *objects):
-        run_tool(cmd)
-    return dest
-
-
-def build(args, build_leak_check=False):
-    debug_objects = []
-    debug_env = init_env(debug=True, sanitize=True)
-    for sdir in SRC_DIRS:
-        sources, headers = find_c_files(sdir)
-        if sdir == 'src':
-            headers += ('gumbo/gumbo.h', )
-        debug_objects.extend(build_obj(c, debug_env, headers) for c in sources)
-    link(debug_objects, debug_env)
-    ldflags = add_python_flags(deepcopy(debug_env), return_libs=True)
-    if newer(TEST_EXE, *debug_objects):
-        cmd = ([debug_env.cc] + debug_env.cflags + ['test.c'] + ['-o', 
TEST_EXE] + ldflags)
-        run_tool(cmd)
-    if build_leak_check and newer(MEMLEAK_EXE, 'mem-leak-check.c', 
*debug_objects):
-        cmd = ([debug_env.cc] + debug_env.cflags + ['mem-leak-check.c'] + [
-            '-o', MEMLEAK_EXE] + debug_objects + debug_env.ldflags)
-        cmd = [x for x in cmd if x not in {'-fPIC', '-pthread', '-shared'}]
-        run_tool(cmd)
-    for mod in glob.glob(os.path.join(build_dir, '*' + MOD_EXT)):
-        shutil.copy2(mod, freeze_dir)
-    for mod in glob.glob(os.path.join('src', 'html5_parser', '*.py')):
-        shutil.copy2(mod, freeze_dir)
-
-
-TEST_COMMAND = ['run_tests.py']
-
-
-def add_python_path(env, path):
-    pp = env.get('PYTHONPATH', '')
-    to_join = filter(None, [os.path.abspath(path), pp])
-    env['PYTHONPATH'] = os.pathsep.join(to_join)
-    return env
-
-
-def option_parser():
-    p = argparse.ArgumentParser()
-    p.add_argument(
-        'action',
-        nargs='?',
-        default='test',
-        choices='build test try leak'.split(),
-        help='Action to perform (default is build)')
-    p.add_argument('rest', nargs='*')
-    return p
-
-
-def main():
-    args = option_parser().parse_args()
-    os.chdir(base)
-    safe_makedirs(build_dir), safe_makedirs(freeze_dir)
-    if args.action == 'build':
-        build(args)
-    elif args.action == 'test':
-        build(args)
-        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
-        add_python_path(os.environ, os.path.dirname(freeze_dir))
-        print('\nrunning tests...')
-        os.execlp(TEST_EXE, TEST_EXE, 'run_tests.py', *args.rest)
-    elif args.action == 'try':
-        build(args)
-        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
-        add_python_path(os.environ, os.path.dirname(freeze_dir))
-        os.execlp(
-            TEST_EXE, TEST_EXE, '-c', 'from html5_parser import *; ' + 
args.rest[0], *args.rest[1:])
-    elif args.action == 'leak':
-        build(args, build_leak_check=True)
-        os.environ['MEMLEAK_EXE'] = os.path.abspath(MEMLEAK_EXE)
-        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
-        add_python_path(os.environ, os.path.dirname(freeze_dir))
-        os.execlp(TEST_EXE, TEST_EXE, 'run_tests.py')
-
-
-if __name__ == '__main__':
-    main()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/docs/index.rst 
new/html5-parser-0.4.11/docs/index.rst
--- old/html5-parser-0.4.10/docs/index.rst      2021-09-22 09:00:47.000000000 
+0200
+++ new/html5-parser-0.4.11/docs/index.rst      2023-04-12 07:07:46.000000000 
+0200
@@ -1,7 +1,7 @@
 html5-parser
 ================
 
-|pypi| |unix_build| |windows_build|
+|pypi| |unix_build|
 
 A fast implementation of the `HTML 5 parsing spec
 <https://www.w3.org/TR/html5/syntax.html#parsing>`_ for Python. Parsing is done
@@ -11,7 +11,7 @@
 times that can be **a thirtieth** of the html5lib parse times. That is a
 speedup of **30x**. This differs, for instance, from the gumbo python bindings,
 where the initial parsing is done in C but the transformation into the final
-tree is done in python. 
+tree is done in python.
 
 
 Installation
@@ -65,7 +65,7 @@
 To use html5-parser in your code, after installing it simply do:
 
 .. code-block:: python
-    
+
     from html5_parser import parse
     from lxml.etree import tostring
     root = parse(some_html)
@@ -83,7 +83,7 @@
 
 html5-parser has the ability to parse XHTML documents as well. It will
 preserve namespace information even for namespaces not defined in the HTML 5
-spec. You can ask it to treat the input html as possibly XHTML by using the 
+spec. You can ask it to treat the input html as possibly XHTML by using the
 ``maybe_xhtml`` parameter to the :func:`html5_parser.parse` function. For 
example:
 
 .. code-block:: html
@@ -158,7 +158,7 @@
     
===============================================================================
     html5lib          |lxml              |yes               |35                
|
     soup+html5lib     |BeautifulSoup     |yes               |8                 
|
-    soup+lxml.html    |BeautifulSoup     |no                |2                 
| 
+    soup+lxml.html    |BeautifulSoup     |no                |2                 
|
 
 
 There is further potential for speedup. Currently the gumbo subsystem uses
@@ -189,7 +189,7 @@
             <html:p>xxx<ns0:svg 
xmlns:ns0="http://www.w3.org/2000/svg";><ns0:image 
xmlns:ns1="http://www.w3.org/1999/xlink"; ns1:href="xxx"/></ns0:svg></html:p>
             <html:p>yyy</html:p>
         </html:body>
-    </html:html> 
+    </html:html>
 
 With **html5-parser**:
 
@@ -221,7 +221,7 @@
 <https://github.com/google/gumbo-parser>`__ which has undergone a Google
 security review and been tested on 2.5 billion pages from the Google cache. In
 addition, html5-parser passes (almost) all the tests from the html5lib test
-suite. 
+suite.
 
 Finally, html5-parser is compiled with ``-pedantic-errors -Wall -Werror`` and
 the test suite, consisting of thousands of tests, is run using the address and
@@ -232,10 +232,6 @@
     :target: https://pypi.python.org/pypi/html5-parser
     :alt: Latest version released on PyPi
 
-.. |unix_build| image:: https://api.travis-ci.org/kovidgoyal/html5-parser.svg
-    :target: http://travis-ci.org/kovidgoyal/html5-parser
-    :alt: Build status of the master branch on Unix
-
-.. |windows_build|  image:: 
https://ci.appveyor.com/api/projects/status/github/kovidgoyal/html5-parser?svg=true
-    :target: https://ci.appveyor.com/project/kovidgoyal/html5-parser
-    :alt: Build status of the master branch on Windows
+.. |unix_build| image:: 
https://github.com/kovidgoyal/html5-parser/workflows/CI/badge.svg
+    :target: 
https://github.com/kovidgoyal/html5-parser/actions?query=workflow%3ACI%22
+    :alt: Build status of the master branch
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/gumbo/parser.c 
new/html5-parser-0.4.11/gumbo/parser.c
--- old/html5-parser-0.4.10/gumbo/parser.c      2021-09-22 09:00:47.000000000 
+0200
+++ new/html5-parser-0.4.11/gumbo/parser.c      2023-04-12 07:07:46.000000000 
+0200
@@ -185,7 +185,6 @@
     {"xlink:show", "show", GUMBO_ATTR_NAMESPACE_XLINK},
     {"xlink:title", "title", GUMBO_ATTR_NAMESPACE_XLINK},
     {"xlink:type", "type", GUMBO_ATTR_NAMESPACE_XLINK},
-    {"xml:base", "base", GUMBO_ATTR_NAMESPACE_XML},
     {"xml:lang", "lang", GUMBO_ATTR_NAMESPACE_XML},
     {"xml:space", "space", GUMBO_ATTR_NAMESPACE_XML},
     {"xmlns", "xmlns", GUMBO_ATTR_NAMESPACE_XMLNS},
@@ -4304,6 +4303,12 @@
       parser, token);
 }
 
+static bool
+current_node_is_html_or_integration_point(GumboParser *parser) {
+    GumboNode *current_node = get_current_node(parser);
+    return current_node && (is_mathml_integration_point(current_node) || 
is_html_integration_point(current_node) || 
current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML);
+}
+
 // 
http://www.whatwg.org/specs/web-apps/current-work/complete/tokenization.html#parsing-main-inforeign
 static bool handle_in_foreign_content(GumboParser* parser, GumboToken* token) {
   switch (token->type) {
@@ -4349,18 +4354,8 @@
     ) {
     /* Parse error */
     parser_add_parse_error(parser, token);
-
-    GumboNode *current_node;
-    while ((current_node = get_current_node(parser)) && !(
-                is_mathml_integration_point(current_node) ||
-                is_html_integration_point(current_node) ||
-                current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
-    )) {
-        if (!pop_current_node(parser)) break;
-    }
-
-    parser->_parser_state->_reprocess_current_token = true;
-    return false;
+    while(!current_node_is_html_or_integration_point(parser) && 
pop_current_node(parser)) {}
+    return handle_html_content(parser, token);
   }
 
   if (token->type == GUMBO_TOKEN_START_TAG) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/gumbo/util.h 
new/html5-parser-0.4.11/gumbo/util.h
--- old/html5-parser-0.4.10/gumbo/util.h        2021-09-22 09:00:47.000000000 
+0200
+++ new/html5-parser-0.4.11/gumbo/util.h        2023-04-12 07:07:46.000000000 
+0200
@@ -71,9 +71,10 @@
   return (c | 0x20) >= 'a' && (c | 0x20) <= 'z';
 }
 
-#ifdef GUMBO_DEBUG
+#if defined(GUMBO_DEBUG)
 // Debug wrapper for printf, to make it easier to turn off debugging info when
 // required.
+#include <stdio.h>
 #define gumbo_debug(...) fprintf(stderr, __VA_ARGS__)
 #else
 #define gumbo_debug(...)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/publish.py 
new/html5-parser-0.4.11/publish.py
--- old/html5-parser-0.4.10/publish.py  2021-09-22 09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/publish.py  2023-04-12 07:07:46.000000000 +0200
@@ -1,4 +1,4 @@
-#!/usr/bin/env python2
+#!/usr/bin/env python
 # vim:fileencoding=utf-8
 # License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
 
@@ -15,7 +15,7 @@
 
 sys.path.insert(0, '.')
 if True:
-    from build import version
+    from unix_build import version
 del sys.path[0]
 
 VERSION = '{}.{}.{}'.format(*version)
@@ -41,7 +41,7 @@
 def build_release():
     for rem in 'dist build'.split():
         os.path.exists(rem) and shutil.rmtree(rem)
-    run(sys.executable, 'setup.py', '-q', 'sdist')
+    run(sys.executable, '-m', 'build', '-s')
 
 
 def sign_release():
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/setup.cfg 
new/html5-parser-0.4.11/setup.cfg
--- old/html5-parser-0.4.10/setup.cfg   2021-09-22 09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/setup.cfg   2023-04-12 07:07:46.000000000 +0200
@@ -1,3 +1,35 @@
+[metadata]
+name = html5-parser
+version = 0.4.11
+author = Kovid Goyal
+author_email = redac...@acme.com
+description = Fast C based HTML 5 parsing for python
+license =Apache 2.0
+url = https://html5-parser.readthedocs.io
+platforms = any
+classifiers = 
+    Development Status :: 5 - Production/Stable
+    Intended Audience :: Developers
+    License :: OSI Approved :: Apache Software License
+    Natural Language :: English
+    Operating System :: OS Independent
+    Programming Language :: Python
+    Topic :: Text Processing
+    Topic :: Text Processing :: Markup
+    Topic :: Text Processing :: Markup :: HTML
+    Topic :: Text Processing :: Markup :: XML
+
+[options]
+package_dir =
+   =src
+packages = html5_parser
+install_requires = 
+    chardet
+    lxml>=3.8.0
+
+[options.extras_require]
+soup = beautifulsoup4
+
 [flake8]
 max-line-length = 100
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/setup.py 
new/html5-parser-0.4.11/setup.py
--- old/html5-parser-0.4.10/setup.py    2021-09-22 09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/setup.py    2023-04-12 07:07:46.000000000 +0200
@@ -4,18 +4,18 @@
 
 import os
 import sys
-from distutils.command.build import build as Build
 from itertools import chain
 
-from setuptools import Extension, setup
+from setuptools import Extension, setup, Command
 
 self_path = os.path.abspath(__file__)
 base = os.path.dirname(self_path)
 sys.path.insert(0, base)
 if True:
-    from build import (
-        SRC_DIRS, find_c_files, include_dirs, libraries, library_dirs, 
version, iswindows,
-        TEST_COMMAND, add_python_path)
+    from unix_build import (
+        SRC_DIRS, TEST_COMMAND, add_python_path, find_c_files, include_dirs, 
iswindows, libraries,
+        library_dirs, version
+    )
 del sys.path[0]
 
 src_files = tuple(chain(*map(lambda x: find_c_files(x)[0], SRC_DIRS)))
@@ -24,54 +24,40 @@
     cargs.extend('-std=c99 -fvisibility=hidden'.split())
 
 
-class Test(Build):
+class Test(Command):
 
     description = "run unit tests after in-place build"
+    user_options = []
+    user_options = [
+        ('test-name=', None, 'Specify the test to run.'),
+    ]
+    sub_commands = [
+        ('build', None),
+    ]
+
+    def initialize_options(self):
+        self.test_name = ''
+
+    def finalize_options(self):
+        pass
 
     def run(self):
-        Build.run(self)
-        if self.dry_run:
-            self.announce('skipping "test" (dry run)')
-            return
+        for cmd_name in self.get_sub_commands():
+            self.run_command(cmd_name)
         import subprocess
-        env = add_python_path(os.environ.copy(), self.build_lib)
+        build = self.get_finalized_command('build')
+        env = add_python_path(os.environ.copy(), build.build_lib)
         print('\nrunning tests...')
         sys.stdout.flush()
-        ret = subprocess.Popen([sys.executable] + TEST_COMMAND, env=env).wait()
+        cmd = [sys.executable] + TEST_COMMAND
+        if self.test_name:
+            cmd.append(self.test_name)
+        ret = subprocess.Popen(cmd, env=env).wait()
         if ret != 0:
             raise SystemExit(ret)
 
 
-CLASSIFIERS = """\
-Development Status :: 5 - Production/Stable
-Intended Audience :: Developers
-License :: OSI Approved :: Apache Software License
-Natural Language :: English
-Operating System :: OS Independent
-Programming Language :: Python
-Topic :: Text Processing
-Topic :: Text Processing :: Markup
-Topic :: Text Processing :: Markup :: HTML
-Topic :: Text Processing :: Markup :: XML
-"""
-
 setup(
-    name='html5-parser',
-    version='{}.{}.{}'.format(*version),
-    author='Kovid Goyal',
-    author_email='redac...@acme.com',
-    description='Fast C based HTML 5 parsing for python',
-    license='Apache 2.0',
-    url='https://html5-parser.readthedocs.io',
-    download_url=(
-        "https://pypi.python.org/packages/source/m/html5-parser/";
-        "html5-parser-{}.{}.{}.tar.gz".format(*version)),
-    classifiers=[c for c in CLASSIFIERS.split("\n") if c],
-    platforms=['any'],
-    install_requires=['chardet', 'lxml>=3.8.0'],
-    extras_require={'soup': 'beautifulsoup4'},
-    packages=['html5_parser'],
-    package_dir={'': 'src'},
     cmdclass={'test': Test},
     ext_modules=[
         Extension(
@@ -80,4 +66,9 @@
             libraries=libraries(),
             library_dirs=library_dirs(),
             extra_compile_args=cargs,
+            define_macros=[
+                ('MAJOR', str(version.major)),
+                ('MINOR', str(version.minor)),
+                ('PATCH', str(version.patch))
+            ],
             sources=list(map(str, src_files)))])
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/src/html5_parser/soup.py 
new/html5-parser-0.4.11/src/html5_parser/soup.py
--- old/html5-parser-0.4.10/src/html5_parser/soup.py    2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/src/html5_parser/soup.py    2023-04-12 
07:07:46.000000000 +0200
@@ -127,7 +127,7 @@
 
 
 def parse(utf8_data, stack_size=16 * 1024, keep_doctype=False, 
return_root=True):
-    from . import html_parser
+    from html5_parser import html_parser
     bs, soup, new_tag, Comment, append, NavigableString = init_soup()
     if not isinstance(utf8_data, bytes):
         utf8_data = utf8_data.encode('utf-8')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/src/python-wrapper.c 
new/html5-parser-0.4.11/src/python-wrapper.c
--- old/html5-parser-0.4.10/src/python-wrapper.c        2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/src/python-wrapper.c        2023-04-12 
07:07:46.000000000 +0200
@@ -13,10 +13,6 @@
 #include "as-libxml.h"
 #include "as-python-tree.h"
 
-#define MAJOR 0
-#define MINOR 4
-#define PATCH 10
-
 static char *NAME =  "libxml2:xmlDoc";
 static char *DESTRUCTOR = "destructor:xmlFreeDoc";
 
@@ -96,12 +92,6 @@
             return NULL;
         }
     }
-    if (fragment_namespace != GUMBO_NAMESPACE_HTML) {
-        // causes infinite loops in gumbo, enable the non html fragment 
context tests
-        // in html5lib_adapter.py to trigger
-        PyErr_SetString(PyExc_KeyError, "Fragment parsing with non-HTML 
namespaces is not supported");
-        return NULL;
-    }
     doc = parse_with_options(buffer, (size_t)sz, &opts, context, 
fragment_namespace);
     if (!doc) return NULL;
     return encapsulate(doc);
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/test/html5lib_adapter.py 
new/html5-parser-0.4.11/test/html5lib_adapter.py
--- old/html5-parser-0.4.10/test/html5lib_adapter.py    2021-09-22 
09:00:47.000000000 +0200
+++ new/html5-parser-0.4.11/test/html5lib_adapter.py    2023-04-12 
07:07:46.000000000 +0200
@@ -89,13 +89,16 @@
         add(level, '<', ns, name, '>')
         return ns + name
 
-    def serialize_attr(name, val, level):
+    def serialize_attr_name(name):
         ns = ''
         if name.startswith('{'):
             ns, name = name[1:].rpartition('}')[::2]
             ns = NAMESPACE_PREFIXES.get(ns, ns)
+        return ns + name
+
+    def serialize_attr(name, val, level):
         level += 2
-        add(level, ns, name, '=', '"', val, '"')
+        add(level, serialize_attr_name(name), '=', '"', val, '"')
 
     def serialize_text(text, level=0):
         add((level + 2) if level else 1, '"', text, '"')
@@ -105,7 +108,7 @@
 
     def serialize_node(node, level=1):
         name = serialize_tag(node.tag, level)
-        for attr in sorted(node.keys()):
+        for attr in sorted(node.keys(), key=serialize_attr_name):
             serialize_attr(attr, node.get(attr), level)
         if name == 'template':
             level += 2
@@ -128,6 +131,8 @@
                 serialize_comment(node)
             else:
                 serialize_node(node)
+            if node.tail:
+                serialize_text(node.tail)
     else:
         for c in root.itersiblings(preceding=True):
             serialize_comment(c)
@@ -176,6 +181,10 @@
             return (
                 'gumbo and html5lib differ on <menuitem> parsing'
                 ' and I cannot be bothered to figure out who is right')
+        if 'search-element' in test_name:
+            return (
+                'No idea what the <search> element is. In any case the tests 
only differ in'
+                ' indentation, so skipping')
         noscript = re.search(r'^\| +<noscript>$', expected, flags=re.MULTILINE)
         if noscript is not None:
             return '<noscript> is always parsed with scripting off by gumbo'
@@ -184,8 +193,6 @@
         for line in errors:
             if 'expected-doctype-name-but' in line or 'unknown-doctype' in 
line:
                 return 'gumbo auto-corrects malformed doctypes'
-        if fragment_context and ':' in fragment_context:
-            return 'Fragment parsing with non HTML contexts not supported'
 
     def implementation(self, fragment_context, html, expected, errors, 
test_name):
         if fragment_context:
@@ -195,15 +202,16 @@
             raise unittest.SkipTest(bad)
 
         root = parse(
-            html, namespace_elements=True, sanitize_names=False, 
fragment_context=fragment_context)
+            html, namespace_elements=True, sanitize_names=False,
+            fragment_context=fragment_context)
         output = serialize_construction_output(root, 
fragment_context=fragment_context)
+        from lxml.etree import tostring
 
-        # html5lib doesn't yet support the template tag, but it appears in the
-        # tests with the expectation that the template contents will be under 
the
-        # word 'contents', so we need to reformat that string a bit.
-        # expected = reformatTemplateContents(expected)
-
-        error_msg = '\n'.join(['\n\nInput:', html, '\nExpected:', expected, 
'\nReceived:', output])
+        error_msg = '\n'.join([
+            '\n\nTest name:', test_name, '\nInput:', html, '\nExpected:', 
expected,
+            '\nReceived:', output,
+            '\nOutput tree:', tostring(root, encoding='unicode'),
+        ])
         self.ae(expected, output, error_msg + '\n')
         # TODO: Check error messages, when there's full error support.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.10/unix_build.py 
new/html5-parser-0.4.11/unix_build.py
--- old/html5-parser-0.4.10/unix_build.py       1970-01-01 01:00:00.000000000 
+0100
+++ new/html5-parser-0.4.11/unix_build.py       2023-04-12 07:07:46.000000000 
+0200
@@ -0,0 +1,305 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import (absolute_import, division, print_function, 
unicode_literals)
+
+import argparse
+import errno
+import glob
+import os
+import re
+import shlex
+import shutil
+import subprocess
+import sys
+import sysconfig
+from collections import namedtuple
+from copy import deepcopy
+from itertools import chain
+try:
+    import configparser
+except ImportError:
+    import ConfigParser as configparser
+
+self_path = os.path.abspath(__file__)
+base = os.path.dirname(self_path)
+build_dir = os.path.join(base, 'build', 'custom')
+freeze_dir = os.path.join(base, 'build', 'html5_parser')
+_plat = sys.platform.lower()
+isosx = 'darwin' in _plat
+iswindows = hasattr(sys, 'getwindowsversion')
+is_ci = os.environ.get('CI') == 'true'
+Env = namedtuple('Env', 'cc cflags ldflags linker debug cc_name cc_ver')
+PKGCONFIG = os.environ.get('PKGCONFIG_EXE', 'pkg-config')
+cfg = configparser.ConfigParser()
+cfg.read(os.path.join(base, 'setup.cfg'))
+version = namedtuple('Version', 'major minor patch')(
+    *map(int, cfg.get('metadata', 'version').split('.')))
+
+
+def safe_makedirs(path):
+    try:
+        os.makedirs(path)
+    except EnvironmentError as err:
+        if err.errno != errno.EEXIST:
+            raise
+
+
+def add_python_flags(env, return_libs=False):
+    env.cflags.extend('-I' + sysconfig.get_path(x) for x in 'include 
platinclude'.split())
+    libs = []
+    libs += sysconfig.get_config_var('LIBS').split()
+    libs += sysconfig.get_config_var('SYSLIBS').split()
+    fw = sysconfig.get_config_var('PYTHONFRAMEWORK')
+    if fw:
+        for var in 'data include stdlib'.split():
+            val = sysconfig.get_path(var)
+            if val and '/{}.framework'.format(fw) in val:
+                fdir = val[:val.index('/{}.framework'.format(fw))]
+                if os.path.isdir(os.path.join(fdir, 
'{}.framework'.format(fw))):
+                    framework_dir = fdir
+                    break
+        else:
+            raise SystemExit('Failed to find Python framework')
+        libs.append(os.path.join(framework_dir, 
sysconfig.get_config_var('LDLIBRARY')))
+    else:
+        libs += ['-L' + sysconfig.get_config_var('LIBDIR')]
+        libs += ['-lpython' + sysconfig.get_config_var('VERSION') + 
getattr(sys, 'abiflags', '')]
+        libs += sysconfig.get_config_var('LINKFORSHARED').split()
+    env.ldflags.extend(libs)
+    return libs if return_libs else env
+
+
+def pkg_config(pkg, *args):
+    try:
+        val = subprocess.check_output([PKGCONFIG, pkg] + 
list(args)).decode('utf-8')
+    except EnvironmentError as err:
+        if err.errno == errno.ENOENT:
+            raise SystemExit('pkg-config is required to build html5-parser')
+        raise
+    return list(filter(None, map(str, shlex.split(val))))
+
+
+def env_var(which, default='', split=os.pathsep):
+    val = str(os.environ.get(which, default))
+    if not split:
+        return val
+    return list(filter(None, val.split(split)))
+
+
+def include_dirs():
+    if 'LIBXML_INCLUDE_DIRS' in os.environ:
+        return env_var('LIBXML_INCLUDE_DIRS')
+    return [x[2:] for x in pkg_config('libxml-2.0', '--cflags-only-I')]
+
+
+def libraries():
+    if iswindows:
+        return env_var('LIBXML_LIBS', 'libxml2')
+    if 'LIBXML_LIBS' in os.environ:
+        return env_var('LIBXML_LIBS')
+    return [x[2:] for x in pkg_config('libxml-2.0', '--libs-only-l')]
+
+
+def library_dirs():
+    if 'LIBXML_LIB_DIRS' in os.environ:
+        return env_var('LIBXML_LIB_DIRS')
+    return [x[2:] for x in pkg_config('libxml-2.0', '--libs-only-L')]
+
+
+def cc_version():
+    cc = os.environ.get('CC', 'gcc')
+    raw = subprocess.check_output([cc, '-dM', '-E', '-'], 
stdin=open(os.devnull, 'rb'))
+    m = re.search(br'^#define __clang__ 1', raw, flags=re.M)
+    cc_name = 'gcc' if m is None else 'clang'
+    ver = int(re.search(br'#define __GNUC__ (\d+)', raw, 
flags=re.M).group(1)), int(
+        re.search(br'#define __GNUC_MINOR__ (\d+)', raw, flags=re.M).group(1))
+    return cc, ver, cc_name
+
+
+def get_sanitize_args(cc, ccver):
+    sanitize_args = set()
+    if cc == 'gcc' and ccver < (4, 8):
+        return sanitize_args
+    sanitize_args.add('-fno-omit-frame-pointer')
+    sanitize_args.add('-fsanitize=address')
+    if (cc == 'gcc' and ccver >= (5, 0)) or (cc == 'clang' and not isosx):
+        # clang on macOS does not support -fsanitize=undefined
+        sanitize_args.add('-fsanitize=undefined')
+        # if cc == 'gcc' or (cc == 'clang' and ccver >= (4, 2)):
+        #     sanitize_args.add('-fno-sanitize-recover=all')
+    return sanitize_args
+
+
+def init_env(debug=False, sanitize=False, native_optimizations=False, 
add_python=True):
+    native_optimizations = (native_optimizations and not sanitize and not 
debug)
+    cc, ccver, cc_name = cc_version()
+    stack_protector = '-fstack-protector'
+    if ccver >= (4, 9) and cc_name == 'gcc':
+        stack_protector += '-strong'
+    missing_braces = ''
+    if ccver < (5, 2) and cc_name == 'gcc':
+        missing_braces = '-Wno-missing-braces'
+    optimize = '-ggdb' if debug or sanitize else '-O3'
+    sanitize_args = get_sanitize_args(cc_name, ccver) if sanitize else set()
+    cflags = os.environ.get(
+        'OVERRIDE_CFLAGS', (
+            '-Wextra -Wno-missing-field-initializers -Wall -std=c99 
-fvisibility=hidden'
+            ' -pedantic-errors -Werror {} {} -D{}DEBUG -fwrapv {} {} -pipe 
{}').format(
+                optimize, ' '.join(sanitize_args), ('' if debug else 'N'), 
stack_protector,
+                missing_braces, '-march=native' if native_optimizations else 
''))
+    libxml_cflags = pkg_config('libxml-2.0', '--cflags')
+    cflags = shlex.split(cflags) + libxml_cflags + 
shlex.split(sysconfig.get_config_var('CCSHARED'))
+    ldflags = os.environ.get(
+        'OVERRIDE_LDFLAGS', '-Wall -shared ' + ' '.join(sanitize_args) + ('' 
if debug else ' -O3'))
+    libxml_ldflags = pkg_config('libxml-2.0', '--libs')
+    ldflags = shlex.split(ldflags) + libxml_ldflags
+    cflags += shlex.split(os.environ.get('CFLAGS', ''))
+    ldflags += shlex.split(os.environ.get('LDFLAGS', ''))
+    cflags.append('-pthread')
+    cflags.extend((
+        '-DMAJOR=' + str(version.major),
+        '-DMINOR=' + str(version.minor),
+        '-DPATCH=' + str(version.patch),
+    ))
+    ans = Env(cc, cflags, ldflags, cc, debug, cc_name, ccver)
+    return add_python_flags(ans) if add_python else ans
+
+
+def run_tool(cmd):
+    if hasattr(cmd, 'lower'):
+        cmd = shlex.split(cmd)
+    print(' '.join(cmd))
+    p = subprocess.Popen(cmd)
+    ret = p.wait()
+    if ret != 0:
+        raise SystemExit(ret)
+
+
+def newer(dest, *sources):
+    try:
+        dtime = os.path.getmtime(dest)
+    except EnvironmentError:
+        return True
+    for s in chain(sources, (self_path, )):
+        if os.path.getmtime(s) >= dtime:
+            return True
+    return False
+
+
+def find_c_files(src_dir):
+    ans, headers = [], []
+    for x in sorted(os.listdir(src_dir)):
+        ext = os.path.splitext(x)[1]
+        if ext == '.c' and not x.endswith('-check.c'):
+            ans.append(os.path.join(src_dir, x))
+        elif ext == '.h':
+            headers.append(os.path.join(src_dir, x))
+    ans.sort(key=os.path.getmtime, reverse=True)
+    return tuple(ans), tuple(headers)
+
+
+def build_obj(src, env, headers):
+    suffix = '-debug' if env.debug else ''
+    obj = os.path.join(build_dir, os.path.basename(src).rpartition('.')[0] + 
suffix + '.o')
+    if newer(obj, src, *headers):
+        cflags = list(env.cflags)
+        if src.endswith('char_ref.c'):
+            cflags.append('-Wno-unused-const-variable')
+        cmd = [env.cc] + cflags + ['-c', src] + ['-o', obj]
+        run_tool(cmd)
+    return obj
+
+
+TEST_EXE = os.path.join(build_dir, 'test')
+MEMLEAK_EXE = os.path.join(build_dir, 'mem-leak-check')
+if is_ci:
+    TEST_EXE = os.path.join(os.path.dirname(os.path.abspath(sys.executable)), 
'test-html5-parser')
+SRC_DIRS = 'src gumbo'.split()
+MOD_EXT = '.so'
+
+
+def link(objects, env):
+    dest = os.path.join(build_dir, 'html_parser' + MOD_EXT)
+    o = ['-o', dest]
+    cmd = [env.linker] + objects + o + env.ldflags
+    if newer(dest, *objects):
+        run_tool(cmd)
+    return dest
+
+
+def build(args, build_leak_check=False):
+    debug_objects = []
+    debug_env = init_env(debug=True, sanitize=True)
+    for sdir in SRC_DIRS:
+        sources, headers = find_c_files(sdir)
+        if sdir == 'src':
+            headers += ('gumbo/gumbo.h', )
+        debug_objects.extend(build_obj(c, debug_env, headers) for c in sources)
+    link(debug_objects, debug_env)
+    ldflags = add_python_flags(deepcopy(debug_env), return_libs=True)
+    if newer(TEST_EXE, *debug_objects):
+        cmd = ([debug_env.cc] + debug_env.cflags + ['test.c'] + ['-o', 
TEST_EXE] + ldflags)
+        run_tool(cmd)
+    if build_leak_check and newer(MEMLEAK_EXE, 'mem-leak-check.c', 
*debug_objects):
+        cmd = ([debug_env.cc] + debug_env.cflags + ['mem-leak-check.c'] + [
+            '-o', MEMLEAK_EXE] + debug_objects + debug_env.ldflags)
+        cmd = [x for x in cmd if x not in {'-fPIC', '-pthread', '-shared'}]
+        run_tool(cmd)
+    for mod in glob.glob(os.path.join(build_dir, '*' + MOD_EXT)):
+        shutil.copy2(mod, freeze_dir)
+    for mod in glob.glob(os.path.join('src', 'html5_parser', '*.py')):
+        shutil.copy2(mod, freeze_dir)
+
+
+TEST_COMMAND = ['run_tests.py']
+
+
+def add_python_path(env, path):
+    pp = env.get('PYTHONPATH', '')
+    to_join = filter(None, [os.path.abspath(path), pp])
+    env['PYTHONPATH'] = os.pathsep.join(to_join)
+    return env
+
+
+def option_parser():
+    p = argparse.ArgumentParser()
+    p.add_argument(
+        'action',
+        nargs='?',
+        default='test',
+        choices='build test try leak'.split(),
+        help='Action to perform (default is build)')
+    p.add_argument('rest', nargs='*')
+    return p
+
+
+def main():
+    args = option_parser().parse_args()
+    os.chdir(base)
+    safe_makedirs(build_dir), safe_makedirs(freeze_dir)
+    if args.action == 'build':
+        build(args)
+    elif args.action == 'test':
+        build(args)
+        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
+        add_python_path(os.environ, os.path.dirname(freeze_dir))
+        print('\nrunning tests...')
+        os.execlp(TEST_EXE, TEST_EXE, 'run_tests.py', *args.rest)
+    elif args.action == 'try':
+        build(args)
+        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
+        add_python_path(os.environ, os.path.dirname(freeze_dir))
+        os.execlp(
+            TEST_EXE, TEST_EXE, '-c', 'from html5_parser import *; ' + 
args.rest[0], *args.rest[1:])
+    elif args.action == 'leak':
+        build(args, build_leak_check=True)
+        os.environ['MEMLEAK_EXE'] = os.path.abspath(MEMLEAK_EXE)
+        os.environ['ASAN_OPTIONS'] = 'leak_check_at_exit=0'
+        add_python_path(os.environ, os.path.dirname(freeze_dir))
+        os.execlp(TEST_EXE, TEST_EXE, 'run_tests.py')
+
+
+if __name__ == '__main__':
+    main()

Reply via email to