commit python-html5-parser for openSUSE:Factory

Source-Sync Fri, 15 Oct 2021 14:04:59 -0700

Script 'mail_helper' called by obssrc
Hello community,

here is the log from the commit of package python-html5-parser for 
openSUSE:Factory checked in at 2021-10-15 23:04:10
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old)
 and      /work/SRC/openSUSE:Factory/.python-html5-parser.new.1890 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-html5-parser"

Fri Oct 15 23:04:10 2021 rev:11 rq:925386 version:0.4.10

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes  
2021-06-02 22:12:28.168119085 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-html5-parser.new.1890/python-html5-parser.changes
        2021-10-15 23:04:45.358131358 +0200
@@ -1,0 +2,8 @@
+Fri Oct 15 08:27:05 UTC 2021 - ecsos <ec...@opensuse.org>
+
+- Update to 0.4.10
+  No changelog from upstream.
+  See instead here:
+  
https://github.com/kovidgoyal/html5-parser/compare/v0.4.9...v0.4.10?diff=unified&name=v0.4.10
+
+-------------------------------------------------------------------

Old:
----
  python-html5-parser-0.4.9.tar.gz

New:
----
  python-html5-parser-0.4.10.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html5-parser.spec ++++++
--- /var/tmp/diff_new_pack.TAqKae/_old  2021-10-15 23:04:45.798131672 +0200
+++ /var/tmp/diff_new_pack.TAqKae/_new  2021-10-15 23:04:45.802131674 +0200
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-html5-parser
-Version:        0.4.9
+Version:        0.4.10
 Release:        0
 Summary:        C based HTML 5 parsing for Python
 License:        Apache-2.0
@@ -26,6 +26,7 @@
 URL:            https://github.com/kovidgoyal/html5-parser
 Source:         
https://github.com/kovidgoyal/html5-parser/archive/v%{version}/%{name}-%{version}.tar.gz
 BuildRequires:  %{python_module beautifulsoup4}
+BuildRequires:  %{python_module chardet}
 BuildRequires:  %{python_module devel}
 BuildRequires:  %{python_module lxml >= 3.8.0}
 BuildRequires:  %{python_module setuptools}

++++++ python-html5-parser-0.4.9.tar.gz -> python-html5-parser-0.4.10.tar.gz 
++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/.appveyor.yml 
new/html5-parser-0.4.10/.appveyor.yml
--- old/html5-parser-0.4.9/.appveyor.yml        2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/.appveyor.yml       1970-01-01 01:00:00.000000000 
+0100
@@ -1,27 +0,0 @@
-os: Visual Studio 2015
-
-platform:
-    - x64
-    - x86
-
-cache:
-    - sw -> win-ci.py
-
-environment:
-    matrix:
-        - PY: 36
-
-
-build_script:
-    - ps: |
-        If ($env:Platform -Match "x86") {
-            $env:VCVARS_PLATFORM="x86"
-        } Else {
-            $env:VCVARS_PLATFORM="amd64"
-        }
-    - call "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" %VCVARS_PLATFORM%
-    - C:/Python36-x64/python.exe win-ci.py install_deps
-    - git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git"; 
test/html5lib-tests
-
-test_script:
-    - C:/Python36-x64/python.exe win-ci.py test
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/ci.py 
new/html5-parser-0.4.10/.github/workflows/ci.py
--- old/html5-parser-0.4.9/.github/workflows/ci.py      1970-01-01 
01:00:00.000000000 +0100
+++ new/html5-parser-0.4.10/.github/workflows/ci.py     2021-09-22 
09:00:47.000000000 +0200
@@ -0,0 +1,55 @@
+#!/usr/bin/env python
+# vim:fileencoding=utf-8
+# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import absolute_import, division, print_function, 
unicode_literals
+
+import os
+import shlex
+import subprocess
+import sys
+
+
+is_macos = 'darwin' in sys.platform.lower()
+
+
+def run(*a):
+    if len(a) == 1:
+        a = shlex.split(a[0])
+    ret = subprocess.Popen(a).wait()
+    if ret != 0:
+        print('Running:', a, 'failed', file=sys.stderr)
+        raise SystemExit(ret)
+
+
+def install_deps():
+    if is_macos:
+        pass
+    else:
+        run('sudo apt-get update')
+        run('sudo apt-get install -y libxml2-dev libxslt-dev')
+    deps = 'chardet lxml beautifulsoup4'.split()
+    if sys.version_info.major == 2:
+        deps.append('BeautifulSoup')
+    run(sys.executable, '-m', 'pip', 'install', '--no-binary', 'lxml', *deps)
+    run(sys.executable, '-c', 'from lxml import etree; print(etree)')
+
+
+def main():
+    which = sys.argv[-1]
+    if hasattr(sys, 'getwindowsversion'):
+        run(sys.executable, os.path.join(os.path.dirname(__file__), 
'win-ci.py'), which)
+        return
+    if which == 'install':
+        install_deps()
+    elif which == 'test':
+        builder = os.environ['BUILDER']
+        run(sys.executable, builder, 'test')
+        if builder == 'build.py':
+            run(sys.executable, builder, 'leak')
+    else:
+        raise SystemExit('Unknown action:', which)
+
+
+if __name__ == '__main__':
+    main()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/ci.yml 
new/html5-parser-0.4.10/.github/workflows/ci.yml
--- old/html5-parser-0.4.9/.github/workflows/ci.yml     1970-01-01 
01:00:00.000000000 +0100
+++ new/html5-parser-0.4.10/.github/workflows/ci.yml    2021-09-22 
09:00:47.000000000 +0200
@@ -0,0 +1,49 @@
+name: CI
+on: [push, pull_request]
+env:
+    CI: 'true'
+    LC_ALL: en_US.UTF-8
+    LANG: en_US.UTF-8
+
+jobs:
+    test:
+        name: Test on ${{ matrix.os }} (python=${{ matrix.pyver }} cc=${{ 
matrix.cc }} builder=${{ matrix.builder }})
+        runs-on: ${{ matrix.os }}
+        env:
+            CC: ${{ matrix.cc }}
+            BUILDER: ${{ matrix.builder }}
+        strategy:
+            matrix:
+                include:
+                    - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: 
gcc }
+                    - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: 
clang }
+                    - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: 
gcc }
+                    - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: 
clang }
+                    - { pyver: 3.8, builder: setup.py, os: ubuntu-latest, cc: 
gcc }
+
+                    - { pyver: 3.8, builder: setup.py, os: macos-latest, cc: 
clang }
+
+                    - { pyver: 3.8, builder: setup.py, os: windows-latest, cc: 
cl }
+
+        steps:
+            - name: Checkout source code
+              uses: actions/checkout@master
+              with:
+                fetch-depth: 10
+
+            - name: Set up Python ${{ matrix.pyver }}
+              uses: actions/setup-python@master
+              with:
+                python-version: ${{ matrix.pyver }}
+
+            - name: Install dependencies
+              run:
+                python .github/workflows/ci.py install
+
+            - name: Download html5lib tests
+              run:
+                git clone --depth 1 
https://github.com/html5lib/html5lib-tests.git test/html5lib-tests
+
+            - name: Run tests
+              run:
+                python .github/workflows/ci.py test
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/win-ci.py 
new/html5-parser-0.4.10/.github/workflows/win-ci.py
--- old/html5-parser-0.4.9/.github/workflows/win-ci.py  1970-01-01 
01:00:00.000000000 +0100
+++ new/html5-parser-0.4.10/.github/workflows/win-ci.py 2021-09-22 
09:00:47.000000000 +0200
@@ -0,0 +1,307 @@
+#!/usr/bin/env python3
+# vim:fileencoding=utf-8
+# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
+
+from __future__ import print_function
+
+import errno
+import glob
+import io
+import os
+import pipes
+import shlex
+import shutil
+import subprocess
+import sys
+import tarfile
+import time
+
+ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.11")
+LIBXML2 = "ftp://xmlsoft.org/libxml2/libxml2-{}.tar.gz".format('2.9.4')
+LIBXSLT = "ftp://xmlsoft.org/libxml2/libxslt-{}.tar.gz".format('1.1.28')
+LXML = 
"https://files.pythonhosted.org/packages/c5/2f/a0d8aa3eee6d53d5723d89e1fc32eee11e76801b424e30b55c7aa6302b01/lxml-4.6.1.tar.gz";
  # noqa
+SW = os.path.abspath('sw')
+PYTHON = os.path.abspath(sys.executable)
+os.environ['SW'] = SW
+os.environ['PYTHONPATH'] = os.path.join(SW, r'python\Lib\site-packages')
+plat = 'amd64' if sys.maxsize > 2**32 else 'x86'
+
+
+def printf(*a, **k):
+    print(*a, **k)
+    sys.stdout.flush()
+
+
+def walk(path='.'):
+    for dirpath, dirnames, filenames in os.walk(path):
+        for f in filenames:
+            yield os.path.join(dirpath, f)
+
+
+def download_file(url):
+    for i in range(5):
+        try:
+            printf('Downloading', url)
+            try:
+                return subprocess.check_output(['curl.exe', '-fSL', url])
+            except FileNotFoundError:
+                try:
+                    from urllib.request import urlopen
+                except ImportError:
+                    from urllib import urlopen
+                return urlopen(url).read()
+        except subprocess.CalledProcessError:
+            time.sleep(1)
+    raise SystemExit('Failed to download: {}'.format(url))
+
+
+def split(x):
+    x = x.replace('\\', '\\\\')
+    return shlex.split(x)
+
+
+def run(*args, env=None, cwd=None):
+    if len(args) == 1 and isinstance(args[0], type('')):
+        cmd = split(args[0])
+    else:
+        cmd = args
+    printf(' '.join(pipes.quote(x) for x in cmd))
+    sys.stdout.flush()
+    if env:
+        printf('Using modified env:', env)
+        e = os.environ.copy()
+        e.update(env)
+        env = e
+    try:
+        p = subprocess.Popen(cmd, cwd=cwd, env=env)
+    except EnvironmentError as err:
+        if err.errno == errno.ENOENT:
+            raise SystemExit('Could not find the program: %s' % cmd[0])
+        raise
+    if p.wait() != 0:
+        raise SystemExit(p.returncode)
+
+
+def distutils_vcvars():
+    from distutils.msvc9compiler import find_vcvarsall, get_build_version
+    return find_vcvarsall(get_build_version())
+
+
+def remove_dups(variable):
+    old_list = variable.split(os.pathsep)
+    new_list = []
+    for i in old_list:
+        if i not in new_list:
+            new_list.append(i)
+    return os.pathsep.join(new_list)
+
+
+def query_process(cmd):
+    if plat == 'amd64' and 'PROGRAMFILES(x86)' not in os.environ:
+        os.environ['PROGRAMFILES(x86)'] = os.environ['PROGRAMFILES'] + ' (x86)'
+    result = {}
+    popen = subprocess.Popen(cmd, stdout=subprocess.PIPE,
+                             stderr=subprocess.PIPE)
+    try:
+        stdout, stderr = popen.communicate()
+        if popen.wait() != 0:
+            raise RuntimeError(stderr.decode("mbcs"))
+
+        stdout = stdout.decode("mbcs")
+        for line in stdout.splitlines():
+            if '=' not in line:
+                continue
+            line = line.strip()
+            key, value = line.split('=', 1)
+            key = key.lower()
+            if key == 'path':
+                if value.endswith(os.pathsep):
+                    value = value[:-1]
+                value = remove_dups(value)
+            result[key] = value
+
+    finally:
+        popen.stdout.close()
+        popen.stderr.close()
+    return result
+
+
+def query_vcvarsall():
+    vcvarsall = distutils_vcvars()
+    return query_process('"%s" %s & set' % (vcvarsall, plat))
+
+
+def download_and_extract(url):
+    raw = io.BytesIO(download_file(url))
+    with tarfile.open(fileobj=raw, mode='r:*') as f:
+        f.extractall()
+    for x in os.listdir('.'):
+        if os.path.isdir(x):
+            os.chdir(x)
+            return
+
+
+def ensure_dir(path):
+    try:
+        os.makedirs(path)
+    except EnvironmentError as err:
+        if err.errno != errno.EEXIST:
+            raise
+
+
+def replace_in_file(path, old, new, missing_ok=False):
+    if isinstance(old, type('')):
+        old = old.encode('utf-8')
+    if isinstance(new, type('')):
+        new = new.encode('utf-8')
+    with open(path, 'r+b') as f:
+        raw = f.read()
+        if isinstance(old, bytes):
+            nraw = raw.replace(old, new)
+        else:
+            nraw = old.sub(new, raw)
+        if raw == nraw and not missing_ok:
+            raise ValueError('Failed (pattern not found) to patch: ' + path)
+        f.seek(0), f.truncate()
+        f.write(nraw)
+
+
+def copy_headers(pattern, destdir='include'):
+    dest = os.path.join(SW, destdir)
+    ensure_dir(dest)
+    files = glob.glob(pattern)
+    for f in files:
+        dst = os.path.join(dest, os.path.basename(f))
+        if os.path.isdir(f):
+            shutil.copytree(f, dst)
+        else:
+            shutil.copy2(f, dst)
+
+
+def install_binaries(pattern, destdir='lib', fname_map=os.path.basename):
+    dest = os.path.join(SW, destdir)
+    ensure_dir(dest)
+    files = glob.glob(pattern)
+    files.sort(key=len, reverse=True)
+    if not files:
+        raise ValueError('The pattern %s did not match any actual files' % 
pattern)
+    for f in files:
+        dst = os.path.join(dest, fname_map(f))
+        shutil.copy(f, dst)
+        os.chmod(dst, 0o755)
+        if os.path.exists(f + '.manifest'):
+            shutil.copy(f + '.manifest', dst + '.manifest')
+
+
+def install_tree(src, dest_parent='include', ignore=None):
+    dest_parent = os.path.join(SW, dest_parent)
+    dst = os.path.join(dest_parent, os.path.basename(src))
+    if os.path.exists(dst):
+        shutil.rmtree(dst)
+    shutil.copytree(src, dst, symlinks=True, ignore=ignore)
+    return dst
+
+
+def pure_python():
+    run(PYTHON, '-m', 'pip', 'install', 'chardet', 'bs4', '--prefix', 
os.path.join(SW, 'python'))
+    run(PYTHON, '-c', 'import bs4; print(bs4)')
+
+
+def zlib():
+    run('nmake -f win32/Makefile.msc')
+    install_binaries('zlib1.dll*', 'bin')
+    install_binaries('zlib.lib'), install_binaries('zdll.*')
+    copy_headers('zconf.h'), copy_headers('zlib.h')
+
+
+def libxml2():
+    run(
+        *(
+            'cscript.exe configure.js include={0}/include lib={0}/lib 
prefix={0} zlib=yes iconv=no'.
+            format(SW.replace(os.sep, '/')).split()),
+        cwd='win32')
+    run('nmake /f Makefile.msvc', cwd='win32')
+    install_tree('include/libxml', 'include/libxml2')
+    for f in walk('.'):
+        if f.endswith('.dll'):
+            install_binaries(f, 'bin')
+        elif f.endswith('.lib'):
+            install_binaries(f)
+
+
+def libxslt():
+    run(
+        *(
+            'cscript.exe configure.js include={0}/include 
include={0}/include/libxml2 lib={0}/lib '
+            'prefix={0} zlib=yes iconv=no'.format(SW.replace(os.sep, 
'/')).split()),
+        cwd='win32')
+    replace_in_file('libxslt/win32config.h', '#define snprintf _snprintf', '')
+    for f in walk('.'):
+        if os.path.basename(f).startswith('Makefile'):
+            replace_in_file(f, '/OPT:NOWIN98', '', missing_ok=True)
+    run('nmake /f Makefile.msvc', cwd='win32')
+    install_tree('libxslt', 'include')
+    install_tree('libexslt', 'include')
+    for f in walk('.'):
+        if f.endswith('.dll'):
+            install_binaries(f, 'bin')
+        elif f.endswith('.lib'):
+            install_binaries(f)
+
+
+def lxml():
+    replace_in_file('setupinfo.py', ", 'iconv'", '')
+    run(
+        PYTHON,
+        *(
+            'setup.py build_ext -I {0}/include;{0}/include/libxml2 -L 
{0}/lib'.format(
+                SW.replace(os.sep, '/')).split()))
+    run(PYTHON, 'setup.py', 'install', '--prefix', os.path.join(SW, 'python'))
+    package = glob.glob(os.path.join(SW, 'python', 'lib', 'site-packages', 
'lxml-*.egg', 'lxml'))[0]
+    os.rename(package, os.path.join(SW, 'python', 'lib', 'site-packages', 
'lxml'))
+
+
+def install_deps():
+    env = query_vcvarsall()
+    os.environ.update(env)
+    print(PYTHON)
+    for x in 'build lib bin include python/Lib/site-packages'.split():
+        ensure_dir(os.path.join(SW, x))
+    os.chdir(os.path.join(SW, 'build'))
+    base = os.getcwd()
+    pure_python()
+    for name in 'zlib libxml2 libxslt lxml'.split():
+        os.chdir(base)
+        if os.path.exists(name):
+            continue
+        os.mkdir(name), os.chdir(name)
+        try:
+            download_and_extract(globals()[name.upper()])
+            globals()[name]()
+        except Exception:
+            os.chdir(base)
+            shutil.rmtree(name)
+            raise
+
+
+def build():
+    env = query_vcvarsall()
+    os.environ.update(env)
+    os.environ.update(dict(
+        LIBXML_INCLUDE_DIRS=r'{0}\include;{0}\include\libxml2'.format(SW),
+        LIBXML_LIB_DIRS=r'{0}\lib'.format(SW),
+        HTML5_PARSER_DLL_DIR=os.path.join(SW, 'bin'),
+    ))
+    print('Using PYTHONPATH:', os.environ['PYTHONPATH'])
+    run(PYTHON, 'setup.py', 'test')
+
+
+def main():
+    if sys.argv[-1] == 'install':
+        install_deps()
+    else:
+        build()
+
+
+if __name__ == '__main__':
+    main()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/.travis.yml 
new/html5-parser-0.4.10/.travis.yml
--- old/html5-parser-0.4.9/.travis.yml  2019-11-03 04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/.travis.yml 1970-01-01 01:00:00.000000000 +0100
@@ -1,75 +0,0 @@
-env:
-    global:
-        - PYTHONHASHSEED=random
-
-matrix:
-    include:
-        - os: linux
-          language: python
-          python: 2.7
-          env: BUILDER=build.py CC=gcc PYTHON=python
-          group: beta
-          dist: trusty
-          sudo: false
-          addons:
-              apt:
-                  packages:
-                      - libxml2-dev
-        - os: linux
-          language: python
-          python: 2.7
-          env: BUILDER=build.py CC=clang PYTHON=python 
LSAN_OPTIONS=verbosity=1:log_threads=1
-          group: beta
-          dist: trusty
-          # See https://github.com/travis-ci/travis-ci/issues/9033
-          sudo: required
-          addons:
-              apt:
-                  packages:
-                      - libxml2-dev
-        - os: linux
-          language: python
-          python: 2.7
-          env: BUILDER=setup.py PYTHON=python
-          group: beta
-          dist: trusty
-          sudo: false
-          addons:
-              apt:
-                  packages:
-                      - libxml2-dev
-        - os: linux
-          language: python
-          python: 3.6
-          env: BUILDER=setup.py PYTHON=python
-          group: beta
-          dist: trusty
-          sudo: false
-          addons:
-              apt:
-                  packages:
-                      - libxml2-dev
-        - os: osx
-          language: generic
-          env: BUILDER=setup.py PYTHON=python3
-
-install: |
-    set -e
-    if [[ "$TRAVIS_OS_NAME" == 'osx' ]]; then
-        brew update;
-        brew upgrade python;
-        python3 --version
-        pip3 install --no-binary lxml chardet lxml beautifulsoup4
-    else
-        PLIB=$(ldd `which python` | grep libpython | cut -d ' ' -f 3)
-        export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`dirname $PLIB`
-        pip install --no-binary lxml chardet lxml beautifulsoup4
-        if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install 
BeautifulSoup; fi
-    fi
-    $PYTHON -c "from lxml import etree; print(etree)"
-    git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git"; 
test/html5lib-tests
-    set +e
-
-script:
-    - $PYTHON $BUILDER test
-    - if [[ $BUILDER == "build.py" ]]; then $PYTHON $BUILDER leak; fi
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/README.rst 
new/html5-parser-0.4.10/README.rst
--- old/html5-parser-0.4.9/README.rst   2019-11-03 04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/README.rst  2021-09-22 09:00:47.000000000 +0200
@@ -1,7 +1,7 @@
 html5-parser
 ================
 
-|pypi| |unix_build| |windows_build| |docs|
+|pypi| |build| |docs|
 
 A *fast*, standards compliant, C based, HTML 5 parser for python. Over 
**thirty**
 times as fast as pure python based parsers, such as html5lib.
@@ -12,13 +12,9 @@
     :target: https://pypi.python.org/pypi/html5-parser
     :alt: Latest version released on PyPi
 
-.. |unix_build| image:: https://api.travis-ci.org/kovidgoyal/html5-parser.svg
-    :target: http://travis-ci.org/kovidgoyal/html5-parser
-    :alt: Build status of the master branch on Unix
-
-.. |windows_build|  image:: 
https://ci.appveyor.com/api/projects/status/github/kovidgoyal/html5-parser?svg=true
-    :target: https://ci.appveyor.com/project/kovidgoyal/html5-parser
-    :alt: Build status of the master branch on Windows
+.. |build| image:: 
https://github.com/kovidgoyal/html5-parser/workflows/CI/badge.svg
+    :target: 
https://github.com/kovidgoyal/html5-parser/actions?query=workflow%3ACI";
+    :alt: Build status of the master branch
 
 .. |docs| image:: 
https://readthedocs.org/projects/html5-parser/badge/?version=latest
     :target: https://html5-parser.readthedocs.io/en/latest/
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/build.py 
new/html5-parser-0.4.10/build.py
--- old/html5-parser-0.4.9/build.py     2019-11-03 04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/build.py    2021-09-22 09:00:47.000000000 +0200
@@ -25,7 +25,7 @@
 _plat = sys.platform.lower()
 isosx = 'darwin' in _plat
 iswindows = hasattr(sys, 'getwindowsversion')
-is_travis = os.environ.get('TRAVIS') == 'true'
+is_ci = os.environ.get('CI') == 'true'
 Env = namedtuple('Env', 'cc cflags ldflags linker debug cc_name cc_ver')
 PKGCONFIG = os.environ.get('PKGCONFIG_EXE', 'pkg-config')
 with open(os.path.join(base, 'src/python-wrapper.c'), 'rb') as f:
@@ -209,7 +209,7 @@
 
 TEST_EXE = os.path.join(build_dir, 'test')
 MEMLEAK_EXE = os.path.join(build_dir, 'mem-leak-check')
-if is_travis:
+if is_ci:
     TEST_EXE = os.path.join(os.path.dirname(os.path.abspath(sys.executable)), 
'test-html5-parser')
 SRC_DIRS = 'src gumbo'.split()
 MOD_EXT = '.so'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/gumbo/error.c 
new/html5-parser-0.4.10/gumbo/error.c
--- old/html5-parser-0.4.9/gumbo/error.c        2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/gumbo/error.c       2021-09-22 09:00:47.000000000 
+0200
@@ -78,8 +78,8 @@
     if (i) {
       print_message(output, ", ");
     }
-    GumboTag tag = (GumboTag) error->tag_stack.data[i];
-    print_message(output, gumbo_normalized_tagname(tag));
+    uintptr_t tag = (uintptr_t) error->tag_stack.data[i];
+    print_message(output, gumbo_normalized_tagname((GumboTag)tag));
   }
   gumbo_string_buffer_append_codepoint('.', output);
 }
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/gumbo/parser.c 
new/html5-parser-0.4.10/gumbo/parser.c
--- old/html5-parser-0.4.9/gumbo/parser.c       2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/gumbo/parser.c      2021-09-22 09:00:47.000000000 
+0200
@@ -645,7 +645,7 @@
   if (template_insertion_modes->length == 0) {
     return GUMBO_INSERTION_MODE_INITIAL;
   }
-  return (GumboInsertionMode)
+  return (GumboInsertionMode)(uintptr_t)
       template_insertion_modes->data[(template_insertion_modes->length - 1)];
 }
 
@@ -4344,27 +4344,23 @@
       (tag_is(token, kStartTag, GUMBO_TAG_FONT) &&
           (token_has_attribute(token, "color") ||
               token_has_attribute(token, "face") ||
-              token_has_attribute(token, "size")))) {
+              token_has_attribute(token, "size"))) ||
+      (tag_in(token, kEndTag, (gumbo_tagset){TAG(P), TAG(BR)}))
+    ) {
     /* Parse error */
     parser_add_parse_error(parser, token);
 
-    /*
-     * Fragment case: If the parser was originally created for the HTML
-     * fragment parsing algorithm, then act as described in the "any other
-     * start tag" entry below.
-     */
-    if (!is_fragment_parser(parser)) {
-      do {
-        pop_current_node(parser);
-      } while (!(is_mathml_integration_point(get_current_node(parser)) ||
-                 is_html_integration_point(get_current_node(parser)) ||
-                 get_current_node(parser)->v.element.tag_namespace ==
-                     GUMBO_NAMESPACE_HTML));
-      parser->_parser_state->_reprocess_current_token = true;
-      return false;
+    GumboNode *current_node;
+    while ((current_node = get_current_node(parser)) && !(
+                is_mathml_integration_point(current_node) ||
+                is_html_integration_point(current_node) ||
+                current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML
+    )) {
+        if (!pop_current_node(parser)) break;
     }
 
-    assert(token->type == GUMBO_TOKEN_START_TAG);
+    parser->_parser_state->_reprocess_current_token = true;
+    return false;
   }
 
   if (token->type == GUMBO_TOKEN_START_TAG) {
@@ -4647,7 +4643,7 @@
                     // we exclude the <html> tag as it causes crashes in the 
as-lxml
                     // module, see 
https://github.com/kovidgoyal/html5-parser/issues/17
                     // I dont have the time to track down the root cause, 
probably something
-                    // related to resuing the same string segments for the tag 
name and the
+                    // related to reusing the same string segments for the tag 
name and the
                     // special cloning/modification that happens to HTML tags. 
Since HTML tags
                     // are treated specially anyway, there is no harm in 
excluding them.
                     TAG(HTML)})) {
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/run_tests.py 
new/html5-parser-0.4.10/run_tests.py
--- old/html5-parser-0.4.9/run_tests.py 2019-11-03 04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/run_tests.py        2021-09-22 09:00:47.000000000 
+0200
@@ -10,6 +10,12 @@
 import sys
 import unittest
 
+if 'HTML5_PARSER_DLL_DIR' in os.environ:
+    sys.save_dll_dir = os.add_dll_directory(os.environ['HTML5_PARSER_DLL_DIR'])
+    print('Added DLL directory', sys.save_dll_dir, 'with contents:',
+          os.listdir(os.environ['HTML5_PARSER_DLL_DIR']))
+    print('Current sys.path:', sys.path)
+
 self_path = os.path.abspath(__file__)
 base = os.path.dirname(self_path)
 html5lib_tests_path = os.path.join(base, 'test', 'html5lib-tests')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/src/as-libxml.c 
new/html5-parser-0.4.10/src/as-libxml.c
--- old/html5-parser-0.4.9/src/as-libxml.c      2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/src/as-libxml.c     2021-09-22 09:00:47.000000000 
+0200
@@ -215,7 +215,7 @@
 
     if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) {
         gumbo_tag_from_original_text(&(elem->original_tag));
-        uint8_t tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length);
+        uint8_t tag_sz = (uint8_t)(MIN(sizeof(buf) - 1, 
elem->original_tag.length));
         memcpy(buf, elem->original_tag.data, tag_sz);
         tag = buf;
         if (pd->maybe_xhtml) {
@@ -223,7 +223,7 @@
             nsprefix = check_for_namespace_prefix(&temp, &tag_sz);
             tag = temp;
         }
-        tag_sz = pd->sanitize_names ? sanitize_name((char*)tag) : strlen(tag);
+        tag_sz = (uint8_t)(pd->sanitize_names ? sanitize_name((char*)tag) : 
strlen(tag));
         tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz);
     } else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) {
         gumbo_tag_from_original_text(&(elem->original_tag));
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/src/html5_parser/__init__.py 
new/html5-parser-0.4.10/src/html5_parser/__init__.py
--- old/html5-parser-0.4.9/src/html5_parser/__init__.py 2019-11-03 
04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/src/html5_parser/__init__.py        2021-09-22 
09:00:47.000000000 +0200
@@ -115,7 +115,7 @@
     return {'lxml.etree': 'lxml', 'etree': 'stdlib_etree'}.get(x, x)
 
 
-NAMESPACE_SUPPORTING_BUILDERS = frozenset('lxml stdlib_etree dom'.split())
+NAMESPACE_SUPPORTING_BUILDERS = frozenset('lxml stdlib_etree dom 
lxml_html'.split())
 
 
 def parse(
@@ -129,7 +129,8 @@
     return_root=True,
     line_number_attr=None,
     sanitize_names=True,
-    stack_size=16 * 1024
+    stack_size=16 * 1024,
+    fragment_context=None,
 ):
     '''
     Parse the specified :attr:`html` and return the parsed representation.
@@ -145,7 +146,9 @@
     :param treebuilder:
         The type of tree to return. Note that only the lxml treebuilder is 
fast, as all
         other treebuilders are implemented in python, not C. Supported values 
are:
-          * `lxml <http://lxml.de>`_  -- the default, and fastest
+          * `lxml <https://lxml.de>`_  -- the default, and fastest
+          * `lxml_html <https://lxml.de>`_  -- tree of lxml.html.HtmlElement, 
same speed as lxml
+            (new in *0.4.10*)
           * etree (the python stdlib :mod:`xml.etree.ElementTree`)
           * dom (the python stdlib :mod:`xml.dom.minidom`)
           * `soup <https://www.crummy.com/software/BeautifulSoup>`_ -- 
BeautifulSoup,
@@ -161,7 +164,8 @@
         suitable for XHTML. In particular handles self-closed CDATA elements.
         So a ``<title/>`` or ``<style/>`` in the HTML will not completely break
         parsing. Also preserves namespaced tags and attributes even for 
namespaces
-        not supported by HTML 5 (this works only with the ``lxml`` 
treebuilder).
+        not supported by HTML 5 (this works only with the ``lxml`` and 
``lxml_html``
+        treebuilders).
         Note that setting this also implicitly sets ``namespace_elements``.
 
     :param return_root: If True, return the root node of the document, 
otherwise
@@ -181,6 +185,10 @@
         default is sufficient to avoid memory allocations for all but the
         largest documents.
 
+    :param fragment_context: the tag name under which to parse the HTML when 
the html
+        is a fragment. Common choices are ``div`` or ``body``. To use SVG or 
MATHML tags
+        prefix the tag name with ``svg:`` or ``math:`` respectively. Note that 
currently
+        using a non-HTML fragment_context is not supported. New in *0.4.10*.
     '''
     data = as_utf8(html or b'', transport_encoding, fallback_encoding)
     treebuilder = normalize_treebuilder(treebuilder)
@@ -190,6 +198,15 @@
             data, return_root=return_root, keep_doctype=keep_doctype, 
stack_size=stack_size)
     if treebuilder not in NAMESPACE_SUPPORTING_BUILDERS:
         namespace_elements = False
+    fragment_namespace = html_parser.GUMBO_NAMESPACE_HTML
+    if fragment_context:
+        fragment_context = fragment_context.lower()
+        if ':' in fragment_context:
+            ns, fragment_context = fragment_context.split(':', 1)
+            fragment_namespace = {
+                'svg': html_parser.GUMBO_NAMESPACE_SVG, 'math': 
html_parser.GUMBO_NAMESPACE_MATHML,
+                'html': html_parser.GUMBO_NAMESPACE_HTML
+            }[ns]
 
     capsule = html_parser.parse(
         data,
@@ -198,10 +215,17 @@
         maybe_xhtml=maybe_xhtml,
         line_number_attr=line_number_attr,
         sanitize_names=sanitize_names,
-        stack_size=stack_size)
-
-    ans = etree.adopt_external_document(capsule)
-    if treebuilder == 'lxml':
+        stack_size=stack_size,
+        fragment_context=fragment_context,
+        fragment_namespace=fragment_namespace,
+        )
+
+    interpreter = None
+    if treebuilder == 'lxml_html':
+        from lxml.html import HTMLParser
+        interpreter = HTMLParser()
+    ans = etree.adopt_external_document(capsule, parser=interpreter)
+    if treebuilder in ('lxml', 'lxml_html'):
         return ans.getroot() if return_root else ans
     m = importlib.import_module('html5_parser.' + treebuilder)
     return m.adapt(ans, return_root=return_root)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/src/python-wrapper.c 
new/html5-parser-0.4.10/src/python-wrapper.c
--- old/html5-parser-0.4.9/src/python-wrapper.c 2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/src/python-wrapper.c        2021-09-22 
09:00:47.000000000 +0200
@@ -15,7 +15,7 @@
 
 #define MAJOR 0
 #define MINOR 4
-#define PATCH 9
+#define PATCH 10
 
 static char *NAME =  "libxml2:xmlDoc";
 static char *DESTRUCTOR = "destructor:xmlFreeDoc";
@@ -35,12 +35,12 @@
     return doc;
 }
 
-static inline libxml_doc*
-parse_with_options(const char* buffer, size_t buffer_length, Options *opts) {
+static libxml_doc*
+parse_with_options(const char* buffer, size_t buffer_length, Options *opts, 
const GumboTag context, GumboNamespaceEnum context_namespace) {
     GumboOutput *output = NULL;
     libxml_doc* doc = NULL;
     Py_BEGIN_ALLOW_THREADS;
-    output = gumbo_parse_with_options(&(opts->gumbo_opts), buffer, 
buffer_length);
+    output = gumbo_parse_fragment(&(opts->gumbo_opts), buffer, buffer_length, 
context, context_namespace);
     Py_END_ALLOW_THREADS;
     if (output == NULL) PyErr_NoMemory();
     else {
@@ -76,18 +76,33 @@
     Options opts = {0};
     opts.stack_size = 16 * 1024;
     PyObject *kd = Py_True, *mx = Py_False, *ne = Py_False, *sn = Py_True;
+    char *fragment_context = NULL; Py_ssize_t fragment_context_sz = 0;
     opts.gumbo_opts = kGumboDefaultOptions;
     opts.gumbo_opts.max_errors = 0;  // We discard errors since we are not 
reporting them anyway
+    GumboNamespaceEnum fragment_namespace = GUMBO_NAMESPACE_HTML;
 
-    static char *kwlist[] = {"data", "namespace_elements", "keep_doctype", 
"maybe_xhtml", "line_number_attr", "sanitize_names", "stack_size", NULL};
+    static char *kwlist[] = {"data", "namespace_elements", "keep_doctype", 
"maybe_xhtml", "line_number_attr", "sanitize_names", "stack_size", 
"fragment_context", "fragment_namespace", NULL};
 
-    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|OOOzOI", kwlist, &buffer, 
&sz, &ne, &kd, &mx, &(opts.line_number_attr), &sn, &(opts.stack_size))) return 
NULL;
+    if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|OOOzOIz#i", kwlist, 
&buffer, &sz, &ne, &kd, &mx, &(opts.line_number_attr), &sn, &(opts.stack_size), 
&fragment_context, &fragment_context_sz, &fragment_namespace)) return NULL;
     opts.namespace_elements = PyObject_IsTrue(ne);
     opts.keep_doctype = PyObject_IsTrue(kd);
     opts.sanitize_names = PyObject_IsTrue(sn);
     opts.gumbo_opts.use_xhtml_rules = PyObject_IsTrue(mx);
-
-    doc = parse_with_options(buffer, (size_t)sz, &opts);
+    GumboTag context = GUMBO_TAG_LAST;
+    if (fragment_context && fragment_context_sz > 0) {
+        context = gumbo_tagn_enum(fragment_context, fragment_context_sz);
+        if (context == GUMBO_TAG_UNKNOWN) {
+            PyErr_Format(PyExc_KeyError, "Unknown fragment_context tag name: 
%s", fragment_context);
+            return NULL;
+        }
+    }
+    if (fragment_namespace != GUMBO_NAMESPACE_HTML) {
+        // causes infinite loops in gumbo, enable the non html fragment 
context tests
+        // in html5lib_adapter.py to trigger
+        PyErr_SetString(PyExc_KeyError, "Fragment parsing with non-HTML 
namespaces is not supported");
+        return NULL;
+    }
+    doc = parse_with_options(buffer, (size_t)sz, &opts, context, 
fragment_namespace);
     if (!doc) return NULL;
     return encapsulate(doc);
 }
@@ -187,6 +202,9 @@
     if (PyModule_AddIntMacro(m, MAJOR) != 0) INITERROR;
     if (PyModule_AddIntMacro(m, MINOR) != 0) INITERROR;
     if (PyModule_AddIntMacro(m, PATCH) != 0) INITERROR;
+    if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_HTML) != 0) INITERROR;
+    if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_SVG) != 0) INITERROR;
+    if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_MATHML) != 0) INITERROR;
     if (PyModule_AddIntConstant(m, "LIBXML_VERSION", get_libxml_version()) != 
0) INITERROR;
     known_tag_names = PyTuple_New(GUMBO_TAG_UNKNOWN);
     if (known_tag_names == NULL) INITERROR;
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/test/basic.py 
new/html5-parser-0.4.10/test/basic.py
--- old/html5-parser-0.4.9/test/basic.py        2019-11-03 04:13:38.000000000 
+0100
+++ new/html5-parser-0.4.10/test/basic.py       2021-09-22 09:00:47.000000000 
+0200
@@ -94,3 +94,12 @@
         self.ae(root[1][0].sourceline, 4)
         self.ae(root[1][0][0].sourceline, 4)
         self.ae(root[1][0][0].get('ln'), '4')
+
+    def test_lxml_html(self):
+        root = parse('<html><head><body><p><span>', treebuilder='lxml_html')
+        from lxml.html import HtmlElement
+        self.assertIsInstance(root, HtmlElement)
+
+    def test_fragment(self):
+        root = parse('<span>a</span>', fragment_context='div')
+        self.ae(root[0].tag, 'span')
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/test/html5lib_adapter.py 
new/html5-parser-0.4.10/test/html5lib_adapter.py
--- old/html5-parser-0.4.9/test/html5lib_adapter.py     2019-11-03 
04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/test/html5lib_adapter.py    2021-09-22 
09:00:47.000000000 +0200
@@ -65,10 +65,10 @@
         return {k: n(v) for k, v in data.items()}
 
 
-def serialize_construction_output(root):
+def serialize_construction_output(root, fragment_context):
     tree = root.getroottree()
     lines = []
-    if tree.docinfo.doctype:
+    if tree.docinfo.doctype and not fragment_context:
         di = tree.docinfo
         if di.public_id or di.system_url:
             d = '<!DOCTYPE {} "{}" "{}">'.format(di.root_name, di.public_id, 
di.system_url)
@@ -97,12 +97,11 @@
         level += 2
         add(level, ns, name, '=', '"', val, '"')
 
-    def serialize_text(text, level):
-        level += 2
-        add(level, '"', text, '"')
+    def serialize_text(text, level=0):
+        add((level + 2) if level else 1, '"', text, '"')
 
     def serialize_comment(node, level=1):
-        add(level, '<!-- ', node.text, ' -->')
+        add(level, '<!-- ', node.text or '', ' -->')
 
     def serialize_node(node, level=1):
         name = serialize_tag(node.tag, level)
@@ -121,11 +120,20 @@
             if child.tail:
                 serialize_text(child.tail, level)
 
-    for c in root.itersiblings(preceding=True):
-        serialize_comment(c)
-    serialize_node(root)
-    for c in root.itersiblings():
-        serialize_comment(c)
+    if fragment_context:
+        if root.text:
+            serialize_text(root.text)
+        for node in root.iterchildren():
+            if isinstance(node, _Comment):
+                serialize_comment(node)
+            else:
+                serialize_node(node)
+    else:
+        for c in root.itersiblings(preceding=True):
+            serialize_comment(c)
+        serialize_node(root)
+        for c in root.itersiblings():
+            serialize_comment(c)
     output = '\n'.join(lines)
     # gumbo does not fix single carriage returns generated by entities and it
     # does not lowercase unknown tags
@@ -159,7 +167,7 @@
 class ConstructionTests(BaseTest):
 
     @classmethod
-    def check_test(cls, inner_html, html, expected, errors, test_name):
+    def check_test(cls, fragment_context, html, expected, errors, test_name):
         if test_name == 'isindex' or html == '<!doctype html><isindex 
type="hidden">':
             return (
                 'gumbo and html5lib differ on <isindex> parsing'
@@ -176,17 +184,19 @@
         for line in errors:
             if 'expected-doctype-name-but' in line or 'unknown-doctype' in 
line:
                 return 'gumbo auto-corrects malformed doctypes'
-        if inner_html:
-            return 'TODO: Implement fragment parsing'
+        if fragment_context and ':' in fragment_context:
+            return 'Fragment parsing with non HTML contexts not supported'
 
-    def implementation(self, inner_html, html, expected, errors, test_name):
-        html = inner_html or html
-        bad = self.check_test(inner_html, html, expected, errors, test_name)
+    def implementation(self, fragment_context, html, expected, errors, 
test_name):
+        if fragment_context:
+            fragment_context = fragment_context.replace(' ', ':')
+        bad = self.check_test(fragment_context, html, expected, errors, 
test_name)
         if bad is not None:
             raise unittest.SkipTest(bad)
 
-        root = parse(html, namespace_elements=True, sanitize_names=False)
-        output = serialize_construction_output(root)
+        root = parse(
+            html, namespace_elements=True, sanitize_names=False, 
fragment_context=fragment_context)
+        output = serialize_construction_output(root, 
fragment_context=fragment_context)
 
         # html5lib doesn't yet support the template tag, but it appears in the
         # tests with the expectation that the template contents will be under 
the
@@ -200,7 +210,7 @@
 
 class EncodingTests(BaseTest):
 
-    def implementation(self, inner_html, html, expected, errors, test_name):
+    def implementation(self, fragment_context, html, expected, errors, 
test_name):
         if '<!-- Starts with UTF-8 BOM -->' in html:
             raw = b'\xef\xbb\xbf' + html[3:].encode('ascii')
             self.assertIs(check_bom(raw), codecs.BOM_UTF8)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html5-parser-0.4.9/win-ci.py 
new/html5-parser-0.4.10/win-ci.py
--- old/html5-parser-0.4.9/win-ci.py    2019-11-03 04:13:38.000000000 +0100
+++ new/html5-parser-0.4.10/win-ci.py   1970-01-01 01:00:00.000000000 +0100
@@ -1,255 +0,0 @@
-#!/usr/bin/env python3
-# vim:fileencoding=utf-8
-# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net>
-
-from __future__ import print_function
-
-import errno
-import glob
-import io
-import os
-import pipes
-import shlex
-import shutil
-import subprocess
-import sys
-import tarfile
-import time
-
-ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.11")
-LIBXML2 = "ftp://xmlsoft.org/libxml2/libxml2-{}.tar.gz".format('2.9.4')
-LIBXSLT = "ftp://xmlsoft.org/libxml2/libxslt-{}.tar.gz".format('1.1.28')
-LXML = 
"https://pypi.python.org/packages/20/b3/9f245de14b7696e2d2a386c0b09032a2ff6625270761d6543827e667d8de/lxml-3.8.0.tar.gz";
  # noqa
-SW = os.path.abspath('sw')
-if 'PY' in os.environ and 'Platform' in os.environ:
-    PYTHON = 
os.path.expandvars('C:\\Python%PY%-%Platform%\\python.exe').replace('-x86', '')
-else:
-    PYTHON = sys.executable
-os.environ['SW'] = SW
-os.environ['PYTHONPATH'] = 
os.path.expandvars('%SW%\\python\\Lib\\site-packages;%PYTHONPATH%')
-
-
-def printf(*a, **k):
-    print(*a, **k)
-    sys.stdout.flush()
-
-
-def walk(path='.'):
-    for dirpath, dirnames, filenames in os.walk(path):
-        for f in filenames:
-            yield os.path.join(dirpath, f)
-
-
-def download_file(url):
-    for i in range(5):
-        try:
-            printf('Downloading', url)
-            try:
-                return subprocess.check_output(['curl.exe', '-fSL', url])
-            except FileNotFoundError:
-                try:
-                    from urllib.request import urlopen
-                except ImportError:
-                    from urllib import urlopen
-                return urlopen(url).read()
-        except subprocess.CalledProcessError:
-            time.sleep(1)
-    raise SystemExit('Failed to download: {}'.format(url))
-
-
-def split(x):
-    x = x.replace('\\', '\\\\')
-    return shlex.split(x)
-
-
-def run(*args, env=None, cwd=None):
-    if len(args) == 1 and isinstance(args[0], type('')):
-        cmd = split(args[0])
-    else:
-        cmd = args
-    printf(' '.join(pipes.quote(x) for x in cmd))
-    sys.stdout.flush()
-    if env:
-        printf('Using modified env:', env)
-        e = os.environ.copy()
-        e.update(env)
-        env = e
-    try:
-        p = subprocess.Popen(cmd, cwd=cwd, env=env)
-    except EnvironmentError as err:
-        if err.errno == errno.ENOENT:
-            raise SystemExit('Could not find the program: %s' % cmd[0])
-        raise
-    if p.wait() != 0:
-        raise SystemExit(p.returncode)
-
-
-def download_and_extract(url):
-    raw = io.BytesIO(download_file(url))
-    with tarfile.open(fileobj=raw, mode='r:*') as f:
-        f.extractall()
-    for x in os.listdir('.'):
-        if os.path.isdir(x):
-            os.chdir(x)
-            return
-
-
-def ensure_dir(path):
-    try:
-        os.makedirs(path)
-    except EnvironmentError as err:
-        if err.errno != errno.EEXIST:
-            raise
-
-
-def replace_in_file(path, old, new, missing_ok=False):
-    if isinstance(old, type('')):
-        old = old.encode('utf-8')
-    if isinstance(new, type('')):
-        new = new.encode('utf-8')
-    with open(path, 'r+b') as f:
-        raw = f.read()
-        if isinstance(old, bytes):
-            nraw = raw.replace(old, new)
-        else:
-            nraw = old.sub(new, raw)
-        if raw == nraw and not missing_ok:
-            raise ValueError('Failed (pattern not found) to patch: ' + path)
-        f.seek(0), f.truncate()
-        f.write(nraw)
-
-
-def copy_headers(pattern, destdir='include'):
-    dest = os.path.join(SW, destdir)
-    ensure_dir(dest)
-    files = glob.glob(pattern)
-    for f in files:
-        dst = os.path.join(dest, os.path.basename(f))
-        if os.path.isdir(f):
-            shutil.copytree(f, dst)
-        else:
-            shutil.copy2(f, dst)
-
-
-def install_binaries(pattern, destdir='lib', fname_map=os.path.basename):
-    dest = os.path.join(SW, destdir)
-    ensure_dir(dest)
-    files = glob.glob(pattern)
-    files.sort(key=len, reverse=True)
-    if not files:
-        raise ValueError('The pattern %s did not match any actual files' % 
pattern)
-    for f in files:
-        dst = os.path.join(dest, fname_map(f))
-        shutil.copy(f, dst)
-        os.chmod(dst, 0o755)
-        if os.path.exists(f + '.manifest'):
-            shutil.copy(f + '.manifest', dst + '.manifest')
-
-
-def install_tree(src, dest_parent='include', ignore=None):
-    dest_parent = os.path.join(SW, dest_parent)
-    dst = os.path.join(dest_parent, os.path.basename(src))
-    if os.path.exists(dst):
-        shutil.rmtree(dst)
-    shutil.copytree(src, dst, symlinks=True, ignore=ignore)
-    return dst
-
-
-def pure_python():
-    run(PYTHON, '-m', 'pip', 'install', 'chardet', 'bs4', '--prefix', 
os.path.join(SW, 'python'))
-    run(PYTHON, '-c', 'import bs4; print(bs4)')
-
-
-def zlib():
-    run('nmake -f win32/Makefile.msc')
-    install_binaries('zlib1.dll*', 'bin')
-    install_binaries('zlib.lib'), install_binaries('zdll.*')
-    copy_headers('zconf.h'), copy_headers('zlib.h')
-
-
-def libxml2():
-    run(
-        *(
-            'cscript.exe configure.js include={0}/include lib={0}/lib 
prefix={0} zlib=yes iconv=no'.
-            format(SW.replace(os.sep, '/')).split()),
-        cwd='win32')
-    run('nmake /f Makefile.msvc', cwd='win32')
-    install_tree('include/libxml', 'include/libxml2')
-    for f in walk('.'):
-        if f.endswith('.dll'):
-            install_binaries(f, 'bin')
-        elif f.endswith('.lib'):
-            install_binaries(f)
-
-
-def libxslt():
-    run(
-        *(
-            'cscript.exe configure.js include={0}/include 
include={0}/include/libxml2 lib={0}/lib '
-            'prefix={0} zlib=yes iconv=no'.format(SW.replace(os.sep, 
'/')).split()),
-        cwd='win32')
-    replace_in_file('libxslt/win32config.h', '#define snprintf _snprintf', '')
-    for f in walk('.'):
-        if os.path.basename(f).startswith('Makefile'):
-            replace_in_file(f, '/OPT:NOWIN98', '', missing_ok=True)
-    run('nmake /f Makefile.msvc', cwd='win32')
-    install_tree('libxslt', 'include')
-    install_tree('libexslt', 'include')
-    for f in walk('.'):
-        if f.endswith('.dll'):
-            install_binaries(f, 'bin')
-        elif f.endswith('.lib'):
-            install_binaries(f)
-
-
-def lxml():
-    replace_in_file('setupinfo.py', ", 'iconv'", '')
-    run(
-        PYTHON,
-        *(
-            'setup.py build_ext -I {0}/include;{0}/include/libxml2 -L 
{0}/lib'.format(
-                SW.replace(os.sep, '/')).split()))
-    run(PYTHON, 'setup.py', 'install', '--prefix', os.path.join(SW, 'python'))
-
-
-def install_deps():
-    print(PYTHON)
-    for x in 'build lib bin include python/Lib/site-packages'.split():
-        ensure_dir(os.path.join(SW, x))
-    os.chdir(os.path.join(SW, 'build'))
-    base = os.getcwd()
-    pure_python()
-    for name in 'zlib libxml2 libxslt lxml'.split():
-        os.chdir(base)
-        if os.path.exists(name):
-            continue
-        os.mkdir(name), os.chdir(name)
-        try:
-            download_and_extract(globals()[name.upper()])
-            globals()[name]()
-        except:
-            os.chdir(base)
-            shutil.rmtree(name)
-            raise
-
-
-def build():
-    p = os.environ['PATH']
-    p = os.path.join(SW, 'bin') + os.pathsep + p
-    env = dict(
-        LIBXML_INCLUDE_DIRS=r'{0}\include;{0}\include\libxml2'.format(SW),
-        LIBXML_LIB_DIRS=r'{0}\lib'.format(SW),
-        PATH=p
-    )
-    run(PYTHON, 'setup.py', 'test', env=env)
-
-
-def main():
-    if sys.argv[-1] == 'install_deps':
-        install_deps()
-    else:
-        build()
-
-
-if __name__ == '__main__':
-    main()

commit python-html5-parser for openSUSE:Factory

Reply via email to