Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-html5-parser for openSUSE:Factory checked in at 2021-10-15 23:04:10 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-html5-parser (Old) and /work/SRC/openSUSE:Factory/.python-html5-parser.new.1890 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-html5-parser" Fri Oct 15 23:04:10 2021 rev:11 rq:925386 version:0.4.10 Changes: -------- --- /work/SRC/openSUSE:Factory/python-html5-parser/python-html5-parser.changes 2021-06-02 22:12:28.168119085 +0200 +++ /work/SRC/openSUSE:Factory/.python-html5-parser.new.1890/python-html5-parser.changes 2021-10-15 23:04:45.358131358 +0200 @@ -1,0 +2,8 @@ +Fri Oct 15 08:27:05 UTC 2021 - ecsos <ec...@opensuse.org> + +- Update to 0.4.10 + No changelog from upstream. + See instead here: + https://github.com/kovidgoyal/html5-parser/compare/v0.4.9...v0.4.10?diff=unified&name=v0.4.10 + +------------------------------------------------------------------- Old: ---- python-html5-parser-0.4.9.tar.gz New: ---- python-html5-parser-0.4.10.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-html5-parser.spec ++++++ --- /var/tmp/diff_new_pack.TAqKae/_old 2021-10-15 23:04:45.798131672 +0200 +++ /var/tmp/diff_new_pack.TAqKae/_new 2021-10-15 23:04:45.802131674 +0200 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-html5-parser -Version: 0.4.9 +Version: 0.4.10 Release: 0 Summary: C based HTML 5 parsing for Python License: Apache-2.0 @@ -26,6 +26,7 @@ URL: https://github.com/kovidgoyal/html5-parser Source: https://github.com/kovidgoyal/html5-parser/archive/v%{version}/%{name}-%{version}.tar.gz BuildRequires: %{python_module beautifulsoup4} +BuildRequires: %{python_module chardet} BuildRequires: %{python_module devel} BuildRequires: %{python_module lxml >= 3.8.0} BuildRequires: %{python_module setuptools} ++++++ python-html5-parser-0.4.9.tar.gz -> python-html5-parser-0.4.10.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/.appveyor.yml new/html5-parser-0.4.10/.appveyor.yml --- old/html5-parser-0.4.9/.appveyor.yml 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/.appveyor.yml 1970-01-01 01:00:00.000000000 +0100 @@ -1,27 +0,0 @@ -os: Visual Studio 2015 - -platform: - - x64 - - x86 - -cache: - - sw -> win-ci.py - -environment: - matrix: - - PY: 36 - - -build_script: - - ps: | - If ($env:Platform -Match "x86") { - $env:VCVARS_PLATFORM="x86" - } Else { - $env:VCVARS_PLATFORM="amd64" - } - - call "%VS140COMNTOOLS%\..\..\VC\vcvarsall.bat" %VCVARS_PLATFORM% - - C:/Python36-x64/python.exe win-ci.py install_deps - - git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git" test/html5lib-tests - -test_script: - - C:/Python36-x64/python.exe win-ci.py test diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/ci.py new/html5-parser-0.4.10/.github/workflows/ci.py --- old/html5-parser-0.4.9/.github/workflows/ci.py 1970-01-01 01:00:00.000000000 +0100 +++ new/html5-parser-0.4.10/.github/workflows/ci.py 2021-09-22 09:00:47.000000000 +0200 @@ -0,0 +1,55 @@ +#!/usr/bin/env python +# vim:fileencoding=utf-8 +# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net> + +from __future__ import absolute_import, division, print_function, unicode_literals + +import os +import shlex +import subprocess +import sys + + +is_macos = 'darwin' in sys.platform.lower() + + +def run(*a): + if len(a) == 1: + a = shlex.split(a[0]) + ret = subprocess.Popen(a).wait() + if ret != 0: + print('Running:', a, 'failed', file=sys.stderr) + raise SystemExit(ret) + + +def install_deps(): + if is_macos: + pass + else: + run('sudo apt-get update') + run('sudo apt-get install -y libxml2-dev libxslt-dev') + deps = 'chardet lxml beautifulsoup4'.split() + if sys.version_info.major == 2: + deps.append('BeautifulSoup') + run(sys.executable, '-m', 'pip', 'install', '--no-binary', 'lxml', *deps) + run(sys.executable, '-c', 'from lxml import etree; print(etree)') + + +def main(): + which = sys.argv[-1] + if hasattr(sys, 'getwindowsversion'): + run(sys.executable, os.path.join(os.path.dirname(__file__), 'win-ci.py'), which) + return + if which == 'install': + install_deps() + elif which == 'test': + builder = os.environ['BUILDER'] + run(sys.executable, builder, 'test') + if builder == 'build.py': + run(sys.executable, builder, 'leak') + else: + raise SystemExit('Unknown action:', which) + + +if __name__ == '__main__': + main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/ci.yml new/html5-parser-0.4.10/.github/workflows/ci.yml --- old/html5-parser-0.4.9/.github/workflows/ci.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/html5-parser-0.4.10/.github/workflows/ci.yml 2021-09-22 09:00:47.000000000 +0200 @@ -0,0 +1,49 @@ +name: CI +on: [push, pull_request] +env: + CI: 'true' + LC_ALL: en_US.UTF-8 + LANG: en_US.UTF-8 + +jobs: + test: + name: Test on ${{ matrix.os }} (python=${{ matrix.pyver }} cc=${{ matrix.cc }} builder=${{ matrix.builder }}) + runs-on: ${{ matrix.os }} + env: + CC: ${{ matrix.cc }} + BUILDER: ${{ matrix.builder }} + strategy: + matrix: + include: + - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: gcc } + - { pyver: 2.7, builder: build.py, os: ubuntu-latest, cc: clang } + - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: gcc } + - { pyver: 3.6, builder: build.py, os: ubuntu-latest, cc: clang } + - { pyver: 3.8, builder: setup.py, os: ubuntu-latest, cc: gcc } + + - { pyver: 3.8, builder: setup.py, os: macos-latest, cc: clang } + + - { pyver: 3.8, builder: setup.py, os: windows-latest, cc: cl } + + steps: + - name: Checkout source code + uses: actions/checkout@master + with: + fetch-depth: 10 + + - name: Set up Python ${{ matrix.pyver }} + uses: actions/setup-python@master + with: + python-version: ${{ matrix.pyver }} + + - name: Install dependencies + run: + python .github/workflows/ci.py install + + - name: Download html5lib tests + run: + git clone --depth 1 https://github.com/html5lib/html5lib-tests.git test/html5lib-tests + + - name: Run tests + run: + python .github/workflows/ci.py test diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/.github/workflows/win-ci.py new/html5-parser-0.4.10/.github/workflows/win-ci.py --- old/html5-parser-0.4.9/.github/workflows/win-ci.py 1970-01-01 01:00:00.000000000 +0100 +++ new/html5-parser-0.4.10/.github/workflows/win-ci.py 2021-09-22 09:00:47.000000000 +0200 @@ -0,0 +1,307 @@ +#!/usr/bin/env python3 +# vim:fileencoding=utf-8 +# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net> + +from __future__ import print_function + +import errno +import glob +import io +import os +import pipes +import shlex +import shutil +import subprocess +import sys +import tarfile +import time + +ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.11") +LIBXML2 = "ftp://xmlsoft.org/libxml2/libxml2-{}.tar.gz".format('2.9.4') +LIBXSLT = "ftp://xmlsoft.org/libxml2/libxslt-{}.tar.gz".format('1.1.28') +LXML = "https://files.pythonhosted.org/packages/c5/2f/a0d8aa3eee6d53d5723d89e1fc32eee11e76801b424e30b55c7aa6302b01/lxml-4.6.1.tar.gz" # noqa +SW = os.path.abspath('sw') +PYTHON = os.path.abspath(sys.executable) +os.environ['SW'] = SW +os.environ['PYTHONPATH'] = os.path.join(SW, r'python\Lib\site-packages') +plat = 'amd64' if sys.maxsize > 2**32 else 'x86' + + +def printf(*a, **k): + print(*a, **k) + sys.stdout.flush() + + +def walk(path='.'): + for dirpath, dirnames, filenames in os.walk(path): + for f in filenames: + yield os.path.join(dirpath, f) + + +def download_file(url): + for i in range(5): + try: + printf('Downloading', url) + try: + return subprocess.check_output(['curl.exe', '-fSL', url]) + except FileNotFoundError: + try: + from urllib.request import urlopen + except ImportError: + from urllib import urlopen + return urlopen(url).read() + except subprocess.CalledProcessError: + time.sleep(1) + raise SystemExit('Failed to download: {}'.format(url)) + + +def split(x): + x = x.replace('\\', '\\\\') + return shlex.split(x) + + +def run(*args, env=None, cwd=None): + if len(args) == 1 and isinstance(args[0], type('')): + cmd = split(args[0]) + else: + cmd = args + printf(' '.join(pipes.quote(x) for x in cmd)) + sys.stdout.flush() + if env: + printf('Using modified env:', env) + e = os.environ.copy() + e.update(env) + env = e + try: + p = subprocess.Popen(cmd, cwd=cwd, env=env) + except EnvironmentError as err: + if err.errno == errno.ENOENT: + raise SystemExit('Could not find the program: %s' % cmd[0]) + raise + if p.wait() != 0: + raise SystemExit(p.returncode) + + +def distutils_vcvars(): + from distutils.msvc9compiler import find_vcvarsall, get_build_version + return find_vcvarsall(get_build_version()) + + +def remove_dups(variable): + old_list = variable.split(os.pathsep) + new_list = [] + for i in old_list: + if i not in new_list: + new_list.append(i) + return os.pathsep.join(new_list) + + +def query_process(cmd): + if plat == 'amd64' and 'PROGRAMFILES(x86)' not in os.environ: + os.environ['PROGRAMFILES(x86)'] = os.environ['PROGRAMFILES'] + ' (x86)' + result = {} + popen = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + try: + stdout, stderr = popen.communicate() + if popen.wait() != 0: + raise RuntimeError(stderr.decode("mbcs")) + + stdout = stdout.decode("mbcs") + for line in stdout.splitlines(): + if '=' not in line: + continue + line = line.strip() + key, value = line.split('=', 1) + key = key.lower() + if key == 'path': + if value.endswith(os.pathsep): + value = value[:-1] + value = remove_dups(value) + result[key] = value + + finally: + popen.stdout.close() + popen.stderr.close() + return result + + +def query_vcvarsall(): + vcvarsall = distutils_vcvars() + return query_process('"%s" %s & set' % (vcvarsall, plat)) + + +def download_and_extract(url): + raw = io.BytesIO(download_file(url)) + with tarfile.open(fileobj=raw, mode='r:*') as f: + f.extractall() + for x in os.listdir('.'): + if os.path.isdir(x): + os.chdir(x) + return + + +def ensure_dir(path): + try: + os.makedirs(path) + except EnvironmentError as err: + if err.errno != errno.EEXIST: + raise + + +def replace_in_file(path, old, new, missing_ok=False): + if isinstance(old, type('')): + old = old.encode('utf-8') + if isinstance(new, type('')): + new = new.encode('utf-8') + with open(path, 'r+b') as f: + raw = f.read() + if isinstance(old, bytes): + nraw = raw.replace(old, new) + else: + nraw = old.sub(new, raw) + if raw == nraw and not missing_ok: + raise ValueError('Failed (pattern not found) to patch: ' + path) + f.seek(0), f.truncate() + f.write(nraw) + + +def copy_headers(pattern, destdir='include'): + dest = os.path.join(SW, destdir) + ensure_dir(dest) + files = glob.glob(pattern) + for f in files: + dst = os.path.join(dest, os.path.basename(f)) + if os.path.isdir(f): + shutil.copytree(f, dst) + else: + shutil.copy2(f, dst) + + +def install_binaries(pattern, destdir='lib', fname_map=os.path.basename): + dest = os.path.join(SW, destdir) + ensure_dir(dest) + files = glob.glob(pattern) + files.sort(key=len, reverse=True) + if not files: + raise ValueError('The pattern %s did not match any actual files' % pattern) + for f in files: + dst = os.path.join(dest, fname_map(f)) + shutil.copy(f, dst) + os.chmod(dst, 0o755) + if os.path.exists(f + '.manifest'): + shutil.copy(f + '.manifest', dst + '.manifest') + + +def install_tree(src, dest_parent='include', ignore=None): + dest_parent = os.path.join(SW, dest_parent) + dst = os.path.join(dest_parent, os.path.basename(src)) + if os.path.exists(dst): + shutil.rmtree(dst) + shutil.copytree(src, dst, symlinks=True, ignore=ignore) + return dst + + +def pure_python(): + run(PYTHON, '-m', 'pip', 'install', 'chardet', 'bs4', '--prefix', os.path.join(SW, 'python')) + run(PYTHON, '-c', 'import bs4; print(bs4)') + + +def zlib(): + run('nmake -f win32/Makefile.msc') + install_binaries('zlib1.dll*', 'bin') + install_binaries('zlib.lib'), install_binaries('zdll.*') + copy_headers('zconf.h'), copy_headers('zlib.h') + + +def libxml2(): + run( + *( + 'cscript.exe configure.js include={0}/include lib={0}/lib prefix={0} zlib=yes iconv=no'. + format(SW.replace(os.sep, '/')).split()), + cwd='win32') + run('nmake /f Makefile.msvc', cwd='win32') + install_tree('include/libxml', 'include/libxml2') + for f in walk('.'): + if f.endswith('.dll'): + install_binaries(f, 'bin') + elif f.endswith('.lib'): + install_binaries(f) + + +def libxslt(): + run( + *( + 'cscript.exe configure.js include={0}/include include={0}/include/libxml2 lib={0}/lib ' + 'prefix={0} zlib=yes iconv=no'.format(SW.replace(os.sep, '/')).split()), + cwd='win32') + replace_in_file('libxslt/win32config.h', '#define snprintf _snprintf', '') + for f in walk('.'): + if os.path.basename(f).startswith('Makefile'): + replace_in_file(f, '/OPT:NOWIN98', '', missing_ok=True) + run('nmake /f Makefile.msvc', cwd='win32') + install_tree('libxslt', 'include') + install_tree('libexslt', 'include') + for f in walk('.'): + if f.endswith('.dll'): + install_binaries(f, 'bin') + elif f.endswith('.lib'): + install_binaries(f) + + +def lxml(): + replace_in_file('setupinfo.py', ", 'iconv'", '') + run( + PYTHON, + *( + 'setup.py build_ext -I {0}/include;{0}/include/libxml2 -L {0}/lib'.format( + SW.replace(os.sep, '/')).split())) + run(PYTHON, 'setup.py', 'install', '--prefix', os.path.join(SW, 'python')) + package = glob.glob(os.path.join(SW, 'python', 'lib', 'site-packages', 'lxml-*.egg', 'lxml'))[0] + os.rename(package, os.path.join(SW, 'python', 'lib', 'site-packages', 'lxml')) + + +def install_deps(): + env = query_vcvarsall() + os.environ.update(env) + print(PYTHON) + for x in 'build lib bin include python/Lib/site-packages'.split(): + ensure_dir(os.path.join(SW, x)) + os.chdir(os.path.join(SW, 'build')) + base = os.getcwd() + pure_python() + for name in 'zlib libxml2 libxslt lxml'.split(): + os.chdir(base) + if os.path.exists(name): + continue + os.mkdir(name), os.chdir(name) + try: + download_and_extract(globals()[name.upper()]) + globals()[name]() + except Exception: + os.chdir(base) + shutil.rmtree(name) + raise + + +def build(): + env = query_vcvarsall() + os.environ.update(env) + os.environ.update(dict( + LIBXML_INCLUDE_DIRS=r'{0}\include;{0}\include\libxml2'.format(SW), + LIBXML_LIB_DIRS=r'{0}\lib'.format(SW), + HTML5_PARSER_DLL_DIR=os.path.join(SW, 'bin'), + )) + print('Using PYTHONPATH:', os.environ['PYTHONPATH']) + run(PYTHON, 'setup.py', 'test') + + +def main(): + if sys.argv[-1] == 'install': + install_deps() + else: + build() + + +if __name__ == '__main__': + main() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/.travis.yml new/html5-parser-0.4.10/.travis.yml --- old/html5-parser-0.4.9/.travis.yml 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/.travis.yml 1970-01-01 01:00:00.000000000 +0100 @@ -1,75 +0,0 @@ -env: - global: - - PYTHONHASHSEED=random - -matrix: - include: - - os: linux - language: python - python: 2.7 - env: BUILDER=build.py CC=gcc PYTHON=python - group: beta - dist: trusty - sudo: false - addons: - apt: - packages: - - libxml2-dev - - os: linux - language: python - python: 2.7 - env: BUILDER=build.py CC=clang PYTHON=python LSAN_OPTIONS=verbosity=1:log_threads=1 - group: beta - dist: trusty - # See https://github.com/travis-ci/travis-ci/issues/9033 - sudo: required - addons: - apt: - packages: - - libxml2-dev - - os: linux - language: python - python: 2.7 - env: BUILDER=setup.py PYTHON=python - group: beta - dist: trusty - sudo: false - addons: - apt: - packages: - - libxml2-dev - - os: linux - language: python - python: 3.6 - env: BUILDER=setup.py PYTHON=python - group: beta - dist: trusty - sudo: false - addons: - apt: - packages: - - libxml2-dev - - os: osx - language: generic - env: BUILDER=setup.py PYTHON=python3 - -install: | - set -e - if [[ "$TRAVIS_OS_NAME" == 'osx' ]]; then - brew update; - brew upgrade python; - python3 --version - pip3 install --no-binary lxml chardet lxml beautifulsoup4 - else - PLIB=$(ldd `which python` | grep libpython | cut -d ' ' -f 3) - export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:`dirname $PLIB` - pip install --no-binary lxml chardet lxml beautifulsoup4 - if [[ $TRAVIS_PYTHON_VERSION == 2.* ]]; then pip install BeautifulSoup; fi - fi - $PYTHON -c "from lxml import etree; print(etree)" - git clone --depth 1 "https://github.com/html5lib/html5lib-tests.git" test/html5lib-tests - set +e - -script: - - $PYTHON $BUILDER test - - if [[ $BUILDER == "build.py" ]]; then $PYTHON $BUILDER leak; fi diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/README.rst new/html5-parser-0.4.10/README.rst --- old/html5-parser-0.4.9/README.rst 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/README.rst 2021-09-22 09:00:47.000000000 +0200 @@ -1,7 +1,7 @@ html5-parser ================ -|pypi| |unix_build| |windows_build| |docs| +|pypi| |build| |docs| A *fast*, standards compliant, C based, HTML 5 parser for python. Over **thirty** times as fast as pure python based parsers, such as html5lib. @@ -12,13 +12,9 @@ :target: https://pypi.python.org/pypi/html5-parser :alt: Latest version released on PyPi -.. |unix_build| image:: https://api.travis-ci.org/kovidgoyal/html5-parser.svg - :target: http://travis-ci.org/kovidgoyal/html5-parser - :alt: Build status of the master branch on Unix - -.. |windows_build| image:: https://ci.appveyor.com/api/projects/status/github/kovidgoyal/html5-parser?svg=true - :target: https://ci.appveyor.com/project/kovidgoyal/html5-parser - :alt: Build status of the master branch on Windows +.. |build| image:: https://github.com/kovidgoyal/html5-parser/workflows/CI/badge.svg + :target: https://github.com/kovidgoyal/html5-parser/actions?query=workflow%3ACI" + :alt: Build status of the master branch .. |docs| image:: https://readthedocs.org/projects/html5-parser/badge/?version=latest :target: https://html5-parser.readthedocs.io/en/latest/ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/build.py new/html5-parser-0.4.10/build.py --- old/html5-parser-0.4.9/build.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/build.py 2021-09-22 09:00:47.000000000 +0200 @@ -25,7 +25,7 @@ _plat = sys.platform.lower() isosx = 'darwin' in _plat iswindows = hasattr(sys, 'getwindowsversion') -is_travis = os.environ.get('TRAVIS') == 'true' +is_ci = os.environ.get('CI') == 'true' Env = namedtuple('Env', 'cc cflags ldflags linker debug cc_name cc_ver') PKGCONFIG = os.environ.get('PKGCONFIG_EXE', 'pkg-config') with open(os.path.join(base, 'src/python-wrapper.c'), 'rb') as f: @@ -209,7 +209,7 @@ TEST_EXE = os.path.join(build_dir, 'test') MEMLEAK_EXE = os.path.join(build_dir, 'mem-leak-check') -if is_travis: +if is_ci: TEST_EXE = os.path.join(os.path.dirname(os.path.abspath(sys.executable)), 'test-html5-parser') SRC_DIRS = 'src gumbo'.split() MOD_EXT = '.so' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/gumbo/error.c new/html5-parser-0.4.10/gumbo/error.c --- old/html5-parser-0.4.9/gumbo/error.c 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/gumbo/error.c 2021-09-22 09:00:47.000000000 +0200 @@ -78,8 +78,8 @@ if (i) { print_message(output, ", "); } - GumboTag tag = (GumboTag) error->tag_stack.data[i]; - print_message(output, gumbo_normalized_tagname(tag)); + uintptr_t tag = (uintptr_t) error->tag_stack.data[i]; + print_message(output, gumbo_normalized_tagname((GumboTag)tag)); } gumbo_string_buffer_append_codepoint('.', output); } diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/gumbo/parser.c new/html5-parser-0.4.10/gumbo/parser.c --- old/html5-parser-0.4.9/gumbo/parser.c 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/gumbo/parser.c 2021-09-22 09:00:47.000000000 +0200 @@ -645,7 +645,7 @@ if (template_insertion_modes->length == 0) { return GUMBO_INSERTION_MODE_INITIAL; } - return (GumboInsertionMode) + return (GumboInsertionMode)(uintptr_t) template_insertion_modes->data[(template_insertion_modes->length - 1)]; } @@ -4344,27 +4344,23 @@ (tag_is(token, kStartTag, GUMBO_TAG_FONT) && (token_has_attribute(token, "color") || token_has_attribute(token, "face") || - token_has_attribute(token, "size")))) { + token_has_attribute(token, "size"))) || + (tag_in(token, kEndTag, (gumbo_tagset){TAG(P), TAG(BR)})) + ) { /* Parse error */ parser_add_parse_error(parser, token); - /* - * Fragment case: If the parser was originally created for the HTML - * fragment parsing algorithm, then act as described in the "any other - * start tag" entry below. - */ - if (!is_fragment_parser(parser)) { - do { - pop_current_node(parser); - } while (!(is_mathml_integration_point(get_current_node(parser)) || - is_html_integration_point(get_current_node(parser)) || - get_current_node(parser)->v.element.tag_namespace == - GUMBO_NAMESPACE_HTML)); - parser->_parser_state->_reprocess_current_token = true; - return false; + GumboNode *current_node; + while ((current_node = get_current_node(parser)) && !( + is_mathml_integration_point(current_node) || + is_html_integration_point(current_node) || + current_node->v.element.tag_namespace == GUMBO_NAMESPACE_HTML + )) { + if (!pop_current_node(parser)) break; } - assert(token->type == GUMBO_TOKEN_START_TAG); + parser->_parser_state->_reprocess_current_token = true; + return false; } if (token->type == GUMBO_TOKEN_START_TAG) { @@ -4647,7 +4643,7 @@ // we exclude the <html> tag as it causes crashes in the as-lxml // module, see https://github.com/kovidgoyal/html5-parser/issues/17 // I dont have the time to track down the root cause, probably something - // related to resuing the same string segments for the tag name and the + // related to reusing the same string segments for the tag name and the // special cloning/modification that happens to HTML tags. Since HTML tags // are treated specially anyway, there is no harm in excluding them. TAG(HTML)})) { diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/run_tests.py new/html5-parser-0.4.10/run_tests.py --- old/html5-parser-0.4.9/run_tests.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/run_tests.py 2021-09-22 09:00:47.000000000 +0200 @@ -10,6 +10,12 @@ import sys import unittest +if 'HTML5_PARSER_DLL_DIR' in os.environ: + sys.save_dll_dir = os.add_dll_directory(os.environ['HTML5_PARSER_DLL_DIR']) + print('Added DLL directory', sys.save_dll_dir, 'with contents:', + os.listdir(os.environ['HTML5_PARSER_DLL_DIR'])) + print('Current sys.path:', sys.path) + self_path = os.path.abspath(__file__) base = os.path.dirname(self_path) html5lib_tests_path = os.path.join(base, 'test', 'html5lib-tests') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/src/as-libxml.c new/html5-parser-0.4.10/src/as-libxml.c --- old/html5-parser-0.4.9/src/as-libxml.c 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/src/as-libxml.c 2021-09-22 09:00:47.000000000 +0200 @@ -215,7 +215,7 @@ if (UNLIKELY(elem->tag >= GUMBO_TAG_UNKNOWN)) { gumbo_tag_from_original_text(&(elem->original_tag)); - uint8_t tag_sz = MIN(sizeof(buf) - 1, elem->original_tag.length); + uint8_t tag_sz = (uint8_t)(MIN(sizeof(buf) - 1, elem->original_tag.length)); memcpy(buf, elem->original_tag.data, tag_sz); tag = buf; if (pd->maybe_xhtml) { @@ -223,7 +223,7 @@ nsprefix = check_for_namespace_prefix(&temp, &tag_sz); tag = temp; } - tag_sz = pd->sanitize_names ? sanitize_name((char*)tag) : strlen(tag); + tag_sz = (uint8_t)(pd->sanitize_names ? sanitize_name((char*)tag) : strlen(tag)); tag_name = xmlDictLookup(doc->dict, BAD_CAST tag, tag_sz); } else if (UNLIKELY(elem->tag_namespace == GUMBO_NAMESPACE_SVG)) { gumbo_tag_from_original_text(&(elem->original_tag)); diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/src/html5_parser/__init__.py new/html5-parser-0.4.10/src/html5_parser/__init__.py --- old/html5-parser-0.4.9/src/html5_parser/__init__.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/src/html5_parser/__init__.py 2021-09-22 09:00:47.000000000 +0200 @@ -115,7 +115,7 @@ return {'lxml.etree': 'lxml', 'etree': 'stdlib_etree'}.get(x, x) -NAMESPACE_SUPPORTING_BUILDERS = frozenset('lxml stdlib_etree dom'.split()) +NAMESPACE_SUPPORTING_BUILDERS = frozenset('lxml stdlib_etree dom lxml_html'.split()) def parse( @@ -129,7 +129,8 @@ return_root=True, line_number_attr=None, sanitize_names=True, - stack_size=16 * 1024 + stack_size=16 * 1024, + fragment_context=None, ): ''' Parse the specified :attr:`html` and return the parsed representation. @@ -145,7 +146,9 @@ :param treebuilder: The type of tree to return. Note that only the lxml treebuilder is fast, as all other treebuilders are implemented in python, not C. Supported values are: - * `lxml <http://lxml.de>`_ -- the default, and fastest + * `lxml <https://lxml.de>`_ -- the default, and fastest + * `lxml_html <https://lxml.de>`_ -- tree of lxml.html.HtmlElement, same speed as lxml + (new in *0.4.10*) * etree (the python stdlib :mod:`xml.etree.ElementTree`) * dom (the python stdlib :mod:`xml.dom.minidom`) * `soup <https://www.crummy.com/software/BeautifulSoup>`_ -- BeautifulSoup, @@ -161,7 +164,8 @@ suitable for XHTML. In particular handles self-closed CDATA elements. So a ``<title/>`` or ``<style/>`` in the HTML will not completely break parsing. Also preserves namespaced tags and attributes even for namespaces - not supported by HTML 5 (this works only with the ``lxml`` treebuilder). + not supported by HTML 5 (this works only with the ``lxml`` and ``lxml_html`` + treebuilders). Note that setting this also implicitly sets ``namespace_elements``. :param return_root: If True, return the root node of the document, otherwise @@ -181,6 +185,10 @@ default is sufficient to avoid memory allocations for all but the largest documents. + :param fragment_context: the tag name under which to parse the HTML when the html + is a fragment. Common choices are ``div`` or ``body``. To use SVG or MATHML tags + prefix the tag name with ``svg:`` or ``math:`` respectively. Note that currently + using a non-HTML fragment_context is not supported. New in *0.4.10*. ''' data = as_utf8(html or b'', transport_encoding, fallback_encoding) treebuilder = normalize_treebuilder(treebuilder) @@ -190,6 +198,15 @@ data, return_root=return_root, keep_doctype=keep_doctype, stack_size=stack_size) if treebuilder not in NAMESPACE_SUPPORTING_BUILDERS: namespace_elements = False + fragment_namespace = html_parser.GUMBO_NAMESPACE_HTML + if fragment_context: + fragment_context = fragment_context.lower() + if ':' in fragment_context: + ns, fragment_context = fragment_context.split(':', 1) + fragment_namespace = { + 'svg': html_parser.GUMBO_NAMESPACE_SVG, 'math': html_parser.GUMBO_NAMESPACE_MATHML, + 'html': html_parser.GUMBO_NAMESPACE_HTML + }[ns] capsule = html_parser.parse( data, @@ -198,10 +215,17 @@ maybe_xhtml=maybe_xhtml, line_number_attr=line_number_attr, sanitize_names=sanitize_names, - stack_size=stack_size) - - ans = etree.adopt_external_document(capsule) - if treebuilder == 'lxml': + stack_size=stack_size, + fragment_context=fragment_context, + fragment_namespace=fragment_namespace, + ) + + interpreter = None + if treebuilder == 'lxml_html': + from lxml.html import HTMLParser + interpreter = HTMLParser() + ans = etree.adopt_external_document(capsule, parser=interpreter) + if treebuilder in ('lxml', 'lxml_html'): return ans.getroot() if return_root else ans m = importlib.import_module('html5_parser.' + treebuilder) return m.adapt(ans, return_root=return_root) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/src/python-wrapper.c new/html5-parser-0.4.10/src/python-wrapper.c --- old/html5-parser-0.4.9/src/python-wrapper.c 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/src/python-wrapper.c 2021-09-22 09:00:47.000000000 +0200 @@ -15,7 +15,7 @@ #define MAJOR 0 #define MINOR 4 -#define PATCH 9 +#define PATCH 10 static char *NAME = "libxml2:xmlDoc"; static char *DESTRUCTOR = "destructor:xmlFreeDoc"; @@ -35,12 +35,12 @@ return doc; } -static inline libxml_doc* -parse_with_options(const char* buffer, size_t buffer_length, Options *opts) { +static libxml_doc* +parse_with_options(const char* buffer, size_t buffer_length, Options *opts, const GumboTag context, GumboNamespaceEnum context_namespace) { GumboOutput *output = NULL; libxml_doc* doc = NULL; Py_BEGIN_ALLOW_THREADS; - output = gumbo_parse_with_options(&(opts->gumbo_opts), buffer, buffer_length); + output = gumbo_parse_fragment(&(opts->gumbo_opts), buffer, buffer_length, context, context_namespace); Py_END_ALLOW_THREADS; if (output == NULL) PyErr_NoMemory(); else { @@ -76,18 +76,33 @@ Options opts = {0}; opts.stack_size = 16 * 1024; PyObject *kd = Py_True, *mx = Py_False, *ne = Py_False, *sn = Py_True; + char *fragment_context = NULL; Py_ssize_t fragment_context_sz = 0; opts.gumbo_opts = kGumboDefaultOptions; opts.gumbo_opts.max_errors = 0; // We discard errors since we are not reporting them anyway + GumboNamespaceEnum fragment_namespace = GUMBO_NAMESPACE_HTML; - static char *kwlist[] = {"data", "namespace_elements", "keep_doctype", "maybe_xhtml", "line_number_attr", "sanitize_names", "stack_size", NULL}; + static char *kwlist[] = {"data", "namespace_elements", "keep_doctype", "maybe_xhtml", "line_number_attr", "sanitize_names", "stack_size", "fragment_context", "fragment_namespace", NULL}; - if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|OOOzOI", kwlist, &buffer, &sz, &ne, &kd, &mx, &(opts.line_number_attr), &sn, &(opts.stack_size))) return NULL; + if (!PyArg_ParseTupleAndKeywords(args, kwds, "s#|OOOzOIz#i", kwlist, &buffer, &sz, &ne, &kd, &mx, &(opts.line_number_attr), &sn, &(opts.stack_size), &fragment_context, &fragment_context_sz, &fragment_namespace)) return NULL; opts.namespace_elements = PyObject_IsTrue(ne); opts.keep_doctype = PyObject_IsTrue(kd); opts.sanitize_names = PyObject_IsTrue(sn); opts.gumbo_opts.use_xhtml_rules = PyObject_IsTrue(mx); - - doc = parse_with_options(buffer, (size_t)sz, &opts); + GumboTag context = GUMBO_TAG_LAST; + if (fragment_context && fragment_context_sz > 0) { + context = gumbo_tagn_enum(fragment_context, fragment_context_sz); + if (context == GUMBO_TAG_UNKNOWN) { + PyErr_Format(PyExc_KeyError, "Unknown fragment_context tag name: %s", fragment_context); + return NULL; + } + } + if (fragment_namespace != GUMBO_NAMESPACE_HTML) { + // causes infinite loops in gumbo, enable the non html fragment context tests + // in html5lib_adapter.py to trigger + PyErr_SetString(PyExc_KeyError, "Fragment parsing with non-HTML namespaces is not supported"); + return NULL; + } + doc = parse_with_options(buffer, (size_t)sz, &opts, context, fragment_namespace); if (!doc) return NULL; return encapsulate(doc); } @@ -187,6 +202,9 @@ if (PyModule_AddIntMacro(m, MAJOR) != 0) INITERROR; if (PyModule_AddIntMacro(m, MINOR) != 0) INITERROR; if (PyModule_AddIntMacro(m, PATCH) != 0) INITERROR; + if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_HTML) != 0) INITERROR; + if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_SVG) != 0) INITERROR; + if (PyModule_AddIntMacro(m, GUMBO_NAMESPACE_MATHML) != 0) INITERROR; if (PyModule_AddIntConstant(m, "LIBXML_VERSION", get_libxml_version()) != 0) INITERROR; known_tag_names = PyTuple_New(GUMBO_TAG_UNKNOWN); if (known_tag_names == NULL) INITERROR; diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/test/basic.py new/html5-parser-0.4.10/test/basic.py --- old/html5-parser-0.4.9/test/basic.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/test/basic.py 2021-09-22 09:00:47.000000000 +0200 @@ -94,3 +94,12 @@ self.ae(root[1][0].sourceline, 4) self.ae(root[1][0][0].sourceline, 4) self.ae(root[1][0][0].get('ln'), '4') + + def test_lxml_html(self): + root = parse('<html><head><body><p><span>', treebuilder='lxml_html') + from lxml.html import HtmlElement + self.assertIsInstance(root, HtmlElement) + + def test_fragment(self): + root = parse('<span>a</span>', fragment_context='div') + self.ae(root[0].tag, 'span') diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/test/html5lib_adapter.py new/html5-parser-0.4.10/test/html5lib_adapter.py --- old/html5-parser-0.4.9/test/html5lib_adapter.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/test/html5lib_adapter.py 2021-09-22 09:00:47.000000000 +0200 @@ -65,10 +65,10 @@ return {k: n(v) for k, v in data.items()} -def serialize_construction_output(root): +def serialize_construction_output(root, fragment_context): tree = root.getroottree() lines = [] - if tree.docinfo.doctype: + if tree.docinfo.doctype and not fragment_context: di = tree.docinfo if di.public_id or di.system_url: d = '<!DOCTYPE {} "{}" "{}">'.format(di.root_name, di.public_id, di.system_url) @@ -97,12 +97,11 @@ level += 2 add(level, ns, name, '=', '"', val, '"') - def serialize_text(text, level): - level += 2 - add(level, '"', text, '"') + def serialize_text(text, level=0): + add((level + 2) if level else 1, '"', text, '"') def serialize_comment(node, level=1): - add(level, '<!-- ', node.text, ' -->') + add(level, '<!-- ', node.text or '', ' -->') def serialize_node(node, level=1): name = serialize_tag(node.tag, level) @@ -121,11 +120,20 @@ if child.tail: serialize_text(child.tail, level) - for c in root.itersiblings(preceding=True): - serialize_comment(c) - serialize_node(root) - for c in root.itersiblings(): - serialize_comment(c) + if fragment_context: + if root.text: + serialize_text(root.text) + for node in root.iterchildren(): + if isinstance(node, _Comment): + serialize_comment(node) + else: + serialize_node(node) + else: + for c in root.itersiblings(preceding=True): + serialize_comment(c) + serialize_node(root) + for c in root.itersiblings(): + serialize_comment(c) output = '\n'.join(lines) # gumbo does not fix single carriage returns generated by entities and it # does not lowercase unknown tags @@ -159,7 +167,7 @@ class ConstructionTests(BaseTest): @classmethod - def check_test(cls, inner_html, html, expected, errors, test_name): + def check_test(cls, fragment_context, html, expected, errors, test_name): if test_name == 'isindex' or html == '<!doctype html><isindex type="hidden">': return ( 'gumbo and html5lib differ on <isindex> parsing' @@ -176,17 +184,19 @@ for line in errors: if 'expected-doctype-name-but' in line or 'unknown-doctype' in line: return 'gumbo auto-corrects malformed doctypes' - if inner_html: - return 'TODO: Implement fragment parsing' + if fragment_context and ':' in fragment_context: + return 'Fragment parsing with non HTML contexts not supported' - def implementation(self, inner_html, html, expected, errors, test_name): - html = inner_html or html - bad = self.check_test(inner_html, html, expected, errors, test_name) + def implementation(self, fragment_context, html, expected, errors, test_name): + if fragment_context: + fragment_context = fragment_context.replace(' ', ':') + bad = self.check_test(fragment_context, html, expected, errors, test_name) if bad is not None: raise unittest.SkipTest(bad) - root = parse(html, namespace_elements=True, sanitize_names=False) - output = serialize_construction_output(root) + root = parse( + html, namespace_elements=True, sanitize_names=False, fragment_context=fragment_context) + output = serialize_construction_output(root, fragment_context=fragment_context) # html5lib doesn't yet support the template tag, but it appears in the # tests with the expectation that the template contents will be under the @@ -200,7 +210,7 @@ class EncodingTests(BaseTest): - def implementation(self, inner_html, html, expected, errors, test_name): + def implementation(self, fragment_context, html, expected, errors, test_name): if '<!-- Starts with UTF-8 BOM -->' in html: raw = b'\xef\xbb\xbf' + html[3:].encode('ascii') self.assertIs(check_bom(raw), codecs.BOM_UTF8) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html5-parser-0.4.9/win-ci.py new/html5-parser-0.4.10/win-ci.py --- old/html5-parser-0.4.9/win-ci.py 2019-11-03 04:13:38.000000000 +0100 +++ new/html5-parser-0.4.10/win-ci.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,255 +0,0 @@ -#!/usr/bin/env python3 -# vim:fileencoding=utf-8 -# License: Apache 2.0 Copyright: 2017, Kovid Goyal <kovid at kovidgoyal.net> - -from __future__ import print_function - -import errno -import glob -import io -import os -import pipes -import shlex -import shutil -import subprocess -import sys -import tarfile -import time - -ZLIB = "http://zlib.net/zlib-{}.tar.xz".format("1.2.11") -LIBXML2 = "ftp://xmlsoft.org/libxml2/libxml2-{}.tar.gz".format('2.9.4') -LIBXSLT = "ftp://xmlsoft.org/libxml2/libxslt-{}.tar.gz".format('1.1.28') -LXML = "https://pypi.python.org/packages/20/b3/9f245de14b7696e2d2a386c0b09032a2ff6625270761d6543827e667d8de/lxml-3.8.0.tar.gz" # noqa -SW = os.path.abspath('sw') -if 'PY' in os.environ and 'Platform' in os.environ: - PYTHON = os.path.expandvars('C:\\Python%PY%-%Platform%\\python.exe').replace('-x86', '') -else: - PYTHON = sys.executable -os.environ['SW'] = SW -os.environ['PYTHONPATH'] = os.path.expandvars('%SW%\\python\\Lib\\site-packages;%PYTHONPATH%') - - -def printf(*a, **k): - print(*a, **k) - sys.stdout.flush() - - -def walk(path='.'): - for dirpath, dirnames, filenames in os.walk(path): - for f in filenames: - yield os.path.join(dirpath, f) - - -def download_file(url): - for i in range(5): - try: - printf('Downloading', url) - try: - return subprocess.check_output(['curl.exe', '-fSL', url]) - except FileNotFoundError: - try: - from urllib.request import urlopen - except ImportError: - from urllib import urlopen - return urlopen(url).read() - except subprocess.CalledProcessError: - time.sleep(1) - raise SystemExit('Failed to download: {}'.format(url)) - - -def split(x): - x = x.replace('\\', '\\\\') - return shlex.split(x) - - -def run(*args, env=None, cwd=None): - if len(args) == 1 and isinstance(args[0], type('')): - cmd = split(args[0]) - else: - cmd = args - printf(' '.join(pipes.quote(x) for x in cmd)) - sys.stdout.flush() - if env: - printf('Using modified env:', env) - e = os.environ.copy() - e.update(env) - env = e - try: - p = subprocess.Popen(cmd, cwd=cwd, env=env) - except EnvironmentError as err: - if err.errno == errno.ENOENT: - raise SystemExit('Could not find the program: %s' % cmd[0]) - raise - if p.wait() != 0: - raise SystemExit(p.returncode) - - -def download_and_extract(url): - raw = io.BytesIO(download_file(url)) - with tarfile.open(fileobj=raw, mode='r:*') as f: - f.extractall() - for x in os.listdir('.'): - if os.path.isdir(x): - os.chdir(x) - return - - -def ensure_dir(path): - try: - os.makedirs(path) - except EnvironmentError as err: - if err.errno != errno.EEXIST: - raise - - -def replace_in_file(path, old, new, missing_ok=False): - if isinstance(old, type('')): - old = old.encode('utf-8') - if isinstance(new, type('')): - new = new.encode('utf-8') - with open(path, 'r+b') as f: - raw = f.read() - if isinstance(old, bytes): - nraw = raw.replace(old, new) - else: - nraw = old.sub(new, raw) - if raw == nraw and not missing_ok: - raise ValueError('Failed (pattern not found) to patch: ' + path) - f.seek(0), f.truncate() - f.write(nraw) - - -def copy_headers(pattern, destdir='include'): - dest = os.path.join(SW, destdir) - ensure_dir(dest) - files = glob.glob(pattern) - for f in files: - dst = os.path.join(dest, os.path.basename(f)) - if os.path.isdir(f): - shutil.copytree(f, dst) - else: - shutil.copy2(f, dst) - - -def install_binaries(pattern, destdir='lib', fname_map=os.path.basename): - dest = os.path.join(SW, destdir) - ensure_dir(dest) - files = glob.glob(pattern) - files.sort(key=len, reverse=True) - if not files: - raise ValueError('The pattern %s did not match any actual files' % pattern) - for f in files: - dst = os.path.join(dest, fname_map(f)) - shutil.copy(f, dst) - os.chmod(dst, 0o755) - if os.path.exists(f + '.manifest'): - shutil.copy(f + '.manifest', dst + '.manifest') - - -def install_tree(src, dest_parent='include', ignore=None): - dest_parent = os.path.join(SW, dest_parent) - dst = os.path.join(dest_parent, os.path.basename(src)) - if os.path.exists(dst): - shutil.rmtree(dst) - shutil.copytree(src, dst, symlinks=True, ignore=ignore) - return dst - - -def pure_python(): - run(PYTHON, '-m', 'pip', 'install', 'chardet', 'bs4', '--prefix', os.path.join(SW, 'python')) - run(PYTHON, '-c', 'import bs4; print(bs4)') - - -def zlib(): - run('nmake -f win32/Makefile.msc') - install_binaries('zlib1.dll*', 'bin') - install_binaries('zlib.lib'), install_binaries('zdll.*') - copy_headers('zconf.h'), copy_headers('zlib.h') - - -def libxml2(): - run( - *( - 'cscript.exe configure.js include={0}/include lib={0}/lib prefix={0} zlib=yes iconv=no'. - format(SW.replace(os.sep, '/')).split()), - cwd='win32') - run('nmake /f Makefile.msvc', cwd='win32') - install_tree('include/libxml', 'include/libxml2') - for f in walk('.'): - if f.endswith('.dll'): - install_binaries(f, 'bin') - elif f.endswith('.lib'): - install_binaries(f) - - -def libxslt(): - run( - *( - 'cscript.exe configure.js include={0}/include include={0}/include/libxml2 lib={0}/lib ' - 'prefix={0} zlib=yes iconv=no'.format(SW.replace(os.sep, '/')).split()), - cwd='win32') - replace_in_file('libxslt/win32config.h', '#define snprintf _snprintf', '') - for f in walk('.'): - if os.path.basename(f).startswith('Makefile'): - replace_in_file(f, '/OPT:NOWIN98', '', missing_ok=True) - run('nmake /f Makefile.msvc', cwd='win32') - install_tree('libxslt', 'include') - install_tree('libexslt', 'include') - for f in walk('.'): - if f.endswith('.dll'): - install_binaries(f, 'bin') - elif f.endswith('.lib'): - install_binaries(f) - - -def lxml(): - replace_in_file('setupinfo.py', ", 'iconv'", '') - run( - PYTHON, - *( - 'setup.py build_ext -I {0}/include;{0}/include/libxml2 -L {0}/lib'.format( - SW.replace(os.sep, '/')).split())) - run(PYTHON, 'setup.py', 'install', '--prefix', os.path.join(SW, 'python')) - - -def install_deps(): - print(PYTHON) - for x in 'build lib bin include python/Lib/site-packages'.split(): - ensure_dir(os.path.join(SW, x)) - os.chdir(os.path.join(SW, 'build')) - base = os.getcwd() - pure_python() - for name in 'zlib libxml2 libxslt lxml'.split(): - os.chdir(base) - if os.path.exists(name): - continue - os.mkdir(name), os.chdir(name) - try: - download_and_extract(globals()[name.upper()]) - globals()[name]() - except: - os.chdir(base) - shutil.rmtree(name) - raise - - -def build(): - p = os.environ['PATH'] - p = os.path.join(SW, 'bin') + os.pathsep + p - env = dict( - LIBXML_INCLUDE_DIRS=r'{0}\include;{0}\include\libxml2'.format(SW), - LIBXML_LIB_DIRS=r'{0}\lib'.format(SW), - PATH=p - ) - run(PYTHON, 'setup.py', 'test', env=env) - - -def main(): - if sys.argv[-1] == 'install_deps': - install_deps() - else: - build() - - -if __name__ == '__main__': - main()