Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-Scrapy for openSUSE:Factory checked in at 2024-01-10 21:52:52 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-Scrapy (Old) and /work/SRC/openSUSE:Factory/.python-Scrapy.new.21961 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-Scrapy" Wed Jan 10 21:52:52 2024 rev:18 rq:1137882 version:2.11.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-Scrapy/python-Scrapy.changes 2022-11-09 12:57:09.704255337 +0100 +++ /work/SRC/openSUSE:Factory/.python-Scrapy.new.21961/python-Scrapy.changes 2024-01-10 21:53:01.069755961 +0100 @@ -1,0 +2,24 @@ +Wed Jan 10 07:50:52 UTC 2024 - Daniel Garcia <daniel.gar...@suse.com> + +- Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064 +- Update to 2.11.0: + - Spiders can now modify settings in their from_crawler methods, + e.g. based on spider arguments. + - Periodic logging of stats. + - Bug fixes. +- 2.10.0: + - Added Python 3.12 support, dropped Python 3.7 support. + - The new add-ons framework simplifies configuring 3rd-party + components that support it. + - Exceptions to retry can now be configured. + - Many fixes and improvements for feed exports. +- 2.9.0: + - Per-domain download settings. + - Compatibility with new cryptography and new parsel. + - JMESPath selectors from the new parsel. + - Bug fixes. +- 2.8.0: + - This is a maintenance release, with minor features, bug fixes, and + cleanups. + +------------------------------------------------------------------- Old: ---- Scrapy-2.7.1.tar.gz New: ---- Scrapy-2.11.0.tar.gz twisted-23.8.0-compat.patch BETA DEBUG BEGIN: New: - Add patch twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064 - Update to 2.11.0: BETA DEBUG END: ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-Scrapy.spec ++++++ --- /var/tmp/diff_new_pack.ScDHoH/_old 2024-01-10 21:53:02.161795618 +0100 +++ /var/tmp/diff_new_pack.ScDHoH/_new 2024-01-10 21:53:02.161795618 +0100 @@ -1,7 +1,7 @@ # # spec file for package python-Scrapy # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2024 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,62 +16,63 @@ # -%{?!python_module:%define python_module() python3-%{**}} -%define skip_python2 1 Name: python-Scrapy -Version: 2.7.1 +Version: 2.11.0 Release: 0 Summary: A high-level Python Screen Scraping framework License: BSD-3-Clause Group: Development/Languages/Python URL: https://scrapy.org Source: https://files.pythonhosted.org/packages/source/S/Scrapy/Scrapy-%{version}.tar.gz +# PATCH-FIX-UPSTREAM twisted-23.8.0-compat.patch gh#scrapy/scrapy#6064 +Patch1: twisted-23.8.0-compat.patch BuildRequires: %{python_module Pillow} BuildRequires: %{python_module Protego >= 0.1.15} BuildRequires: %{python_module PyDispatcher >= 2.0.5} -BuildRequires: %{python_module Twisted >= 17.9.0} -BuildRequires: %{python_module botocore} -BuildRequires: %{python_module cryptography >= 2.0} +BuildRequires: %{python_module Twisted >= 18.9.0} +BuildRequires: %{python_module attrs} +BuildRequires: %{python_module botocore >= 1.4.87} +BuildRequires: %{python_module cryptography >= 36.0.0} BuildRequires: %{python_module cssselect >= 0.9.1} BuildRequires: %{python_module dbm} BuildRequires: %{python_module itemadapter >= 0.1.0} BuildRequires: %{python_module itemloaders >= 1.0.1} -BuildRequires: %{python_module jmespath} -BuildRequires: %{python_module lxml >= 3.5.0} +BuildRequires: %{python_module lxml >= 4.4.1} BuildRequires: %{python_module parsel >= 1.5.0} -BuildRequires: %{python_module pyOpenSSL >= 16.2.0} +BuildRequires: %{python_module pexpect >= 4.8.1} +BuildRequires: %{python_module pyOpenSSL >= 21.0.0} BuildRequires: %{python_module pyftpdlib} BuildRequires: %{python_module pytest-xdist} BuildRequires: %{python_module pytest} BuildRequires: %{python_module queuelib >= 1.4.2} -BuildRequires: %{python_module service_identity >= 16.0.0} +BuildRequires: %{python_module service_identity >= 18.1.0} BuildRequires: %{python_module setuptools} BuildRequires: %{python_module sybil} -BuildRequires: %{python_module testfixtures >= 6.0.0} +BuildRequires: %{python_module testfixtures} BuildRequires: %{python_module tldextract} BuildRequires: %{python_module uvloop} BuildRequires: %{python_module w3lib >= 1.17.0} -BuildRequires: %{python_module zope.interface >= 4.1.3} +BuildRequires: %{python_module zope.interface >= 5.1.0} BuildRequires: fdupes BuildRequires: python-rpm-macros BuildRequires: python3-Sphinx BuildRequires: (python3-dataclasses if python3-base < 3.7) Requires: python-Protego >= 0.1.15 Requires: python-PyDispatcher >= 2.0.5 -Requires: python-Twisted >= 17.9.0 -Requires: python-cryptography >= 2.0 +Requires: python-Twisted >= 18.9.0 +Requires: python-cryptography >= 36.0.0 Requires: python-cssselect >= 0.9.1 Requires: python-itemadapter >= 0.1.0 Requires: python-itemloaders >= 1.0.1 -Requires: python-lxml >= 3.5.0 +Requires: python-lxml >= 4.4.1 Requires: python-parsel >= 1.5.0 -Requires: python-pyOpenSSL >= 16.2.0 +Requires: python-pyOpenSSL >= 21.0.0 Requires: python-queuelib >= 1.4.2 -Requires: python-service_identity >= 16.0.0 +Requires: python-service_identity >= 18.1.0 Requires: python-setuptools Requires: python-tldextract Requires: python-w3lib >= 1.17.2 -Requires: python-zope.interface >= 4.1.3 +Requires: python-zope.interface >= 5.1.0 Requires(post): update-alternatives Requires(postun):update-alternatives BuildArch: noarch @@ -90,8 +91,7 @@ Provides documentation for %{name}. %prep -%setup -n Scrapy-%{version} -%autopatch -p1 +%autosetup -p1 -n Scrapy-%{version} sed -i -e 's:= python:= python3:g' docs/Makefile @@ -111,7 +111,9 @@ skiplist="test_pformat" # no online connection to toscrapy.com skiplist="$skiplist or CheckCommandTest" -%{pytest \ +# Flaky test gh#scrapy/scrapy#5703 +skiplist="$skiplist or test_start_requests_laziness" +%{pytest -x \ -k "not (${skiplist})" \ -W ignore::DeprecationWarning \ tests} ++++++ Scrapy-2.7.1.tar.gz -> Scrapy-2.11.0.tar.gz ++++++ ++++ 74862 lines of diff (skipped) ++++++ twisted-23.8.0-compat.patch ++++++ Index: Scrapy-2.11.0/scrapy/crawler.py =================================================================== --- Scrapy-2.11.0.orig/scrapy/crawler.py +++ Scrapy-2.11.0/scrapy/crawler.py @@ -404,8 +404,8 @@ class CrawlerProcess(CrawlerRunner): :param bool stop_after_crawl: stop or not the reactor when all crawlers have finished - :param bool install_signal_handlers: whether to install the shutdown - handlers (default: True) + :param bool install_signal_handlers: whether to install the OS signal + handlers from Twisted and Scrapy (default: True) """ from twisted.internet import reactor @@ -416,15 +416,17 @@ class CrawlerProcess(CrawlerRunner): return d.addBoth(self._stop_reactor) - if install_signal_handlers: - install_shutdown_handlers(self._signal_shutdown) resolver_class = load_object(self.settings["DNS_RESOLVER"]) resolver = create_instance(resolver_class, self.settings, self, reactor=reactor) resolver.install_on_reactor() tp = reactor.getThreadPool() tp.adjustPoolsize(maxthreads=self.settings.getint("REACTOR_THREADPOOL_MAXSIZE")) reactor.addSystemEventTrigger("before", "shutdown", self.stop) - reactor.run(installSignalHandlers=False) # blocking call + if install_signal_handlers: + reactor.addSystemEventTrigger( + "after", "startup", install_shutdown_handlers, self._signal_shutdown + ) + reactor.run(installSignalHandlers=install_signal_handlers) # blocking call def _graceful_stop_reactor(self) -> Deferred: d = self.stop() Index: Scrapy-2.11.0/scrapy/utils/ossignal.py =================================================================== --- Scrapy-2.11.0.orig/scrapy/utils/ossignal.py +++ Scrapy-2.11.0/scrapy/utils/ossignal.py @@ -19,13 +19,10 @@ def install_shutdown_handlers( function: SignalHandlerT, override_sigint: bool = True ) -> None: """Install the given function as a signal handler for all common shutdown - signals (such as SIGINT, SIGTERM, etc). If override_sigint is ``False`` the - SIGINT handler won't be install if there is already a handler in place - (e.g. Pdb) + signals (such as SIGINT, SIGTERM, etc). If ``override_sigint`` is ``False`` the + SIGINT handler won't be installed if there is already a handler in place + (e.g. Pdb) """ - from twisted.internet import reactor - - reactor._handleSignals() signal.signal(signal.SIGTERM, function) if signal.getsignal(signal.SIGINT) == signal.default_int_handler or override_sigint: signal.signal(signal.SIGINT, function) Index: Scrapy-2.11.0/scrapy/utils/testproc.py =================================================================== --- Scrapy-2.11.0.orig/scrapy/utils/testproc.py +++ Scrapy-2.11.0/scrapy/utils/testproc.py @@ -2,7 +2,7 @@ from __future__ import annotations import os import sys -from typing import Iterable, Optional, Tuple, cast +from typing import Iterable, List, Optional, Tuple, cast from twisted.internet.defer import Deferred from twisted.internet.error import ProcessTerminated @@ -26,14 +26,15 @@ class ProcessTest: env = os.environ.copy() if settings is not None: env["SCRAPY_SETTINGS_MODULE"] = settings + assert self.command cmd = self.prefix + [self.command] + list(args) pp = TestProcessProtocol() - pp.deferred.addBoth(self._process_finished, cmd, check_code) + pp.deferred.addCallback(self._process_finished, cmd, check_code) reactor.spawnProcess(pp, cmd[0], cmd, env=env, path=self.cwd) return pp.deferred def _process_finished( - self, pp: TestProcessProtocol, cmd: str, check_code: bool + self, pp: TestProcessProtocol, cmd: List[str], check_code: bool ) -> Tuple[int, bytes, bytes]: if pp.exitcode and check_code: msg = f"process {cmd} exit with code {pp.exitcode}" Index: Scrapy-2.11.0/setup.py =================================================================== --- Scrapy-2.11.0.orig/setup.py +++ Scrapy-2.11.0/setup.py @@ -6,8 +6,7 @@ version = (Path(__file__).parent / "scra install_requires = [ - # 23.8.0 incompatibility: https://github.com/scrapy/scrapy/issues/6024 - "Twisted>=18.9.0,<23.8.0", + "Twisted>=18.9.0", "cryptography>=36.0.0", "cssselect>=0.9.1", "itemloaders>=1.0.1", Index: Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py =================================================================== --- /dev/null +++ Scrapy-2.11.0/tests/CrawlerProcess/sleeping.py @@ -0,0 +1,24 @@ +from twisted.internet.defer import Deferred + +import scrapy +from scrapy.crawler import CrawlerProcess +from scrapy.utils.defer import maybe_deferred_to_future + + +class SleepingSpider(scrapy.Spider): + name = "sleeping" + + start_urls = ["data:,;"] + + async def parse(self, response): + from twisted.internet import reactor + + d = Deferred() + reactor.callLater(3, d.callback, None) + await maybe_deferred_to_future(d) + + +process = CrawlerProcess(settings={}) + +process.crawl(SleepingSpider) +process.start() Index: Scrapy-2.11.0/tests/requirements.txt =================================================================== --- Scrapy-2.11.0.orig/tests/requirements.txt +++ Scrapy-2.11.0/tests/requirements.txt @@ -1,5 +1,6 @@ # Tests requirements attrs +pexpect >= 4.8.0 # https://github.com/giampaolo/pyftpdlib/issues/560 pyftpdlib; python_version < "3.12" pytest Index: Scrapy-2.11.0/tests/test_command_shell.py =================================================================== --- Scrapy-2.11.0.orig/tests/test_command_shell.py +++ Scrapy-2.11.0/tests/test_command_shell.py @@ -1,11 +1,15 @@ +import sys +from io import BytesIO from pathlib import Path +from pexpect.popen_spawn import PopenSpawn from twisted.internet import defer from twisted.trial import unittest from scrapy.utils.testproc import ProcessTest from scrapy.utils.testsite import SiteTest from tests import NON_EXISTING_RESOLVABLE, tests_datadir +from tests.mockserver import MockServer class ShellTest(ProcessTest, SiteTest, unittest.TestCase): @@ -133,3 +137,25 @@ class ShellTest(ProcessTest, SiteTest, u args = ["-c", code, "--set", f"TWISTED_REACTOR={reactor_path}"] _, _, err = yield self.execute(args, check_code=True) self.assertNotIn(b"RuntimeError: There is no current event loop in thread", err) + + +class InteractiveShellTest(unittest.TestCase): + def test_fetch(self): + args = ( + sys.executable, + "-m", + "scrapy.cmdline", + "shell", + ) + logfile = BytesIO() + p = PopenSpawn(args, timeout=5) + p.logfile_read = logfile + p.expect_exact("Available Scrapy objects") + with MockServer() as mockserver: + p.sendline(f"fetch('{mockserver.url('/')}')") + p.sendline("type(response)") + p.expect_exact("HtmlResponse") + p.sendeof() + p.wait() + logfile.seek(0) + self.assertNotIn("Traceback", logfile.read().decode()) Index: Scrapy-2.11.0/tests/test_crawler.py =================================================================== --- Scrapy-2.11.0.orig/tests/test_crawler.py +++ Scrapy-2.11.0/tests/test_crawler.py @@ -1,13 +1,16 @@ import logging import os import platform +import signal import subprocess import sys import warnings from pathlib import Path +from typing import List import pytest from packaging.version import parse as parse_version +from pexpect.popen_spawn import PopenSpawn from pytest import mark, raises from twisted.internet import defer from twisted.trial import unittest @@ -289,9 +292,12 @@ class ScriptRunnerMixin: script_dir: Path cwd = os.getcwd() - def run_script(self, script_name: str, *script_args): + def get_script_args(self, script_name: str, *script_args: str) -> List[str]: script_path = self.script_dir / script_name - args = [sys.executable, str(script_path)] + list(script_args) + return [sys.executable, str(script_path)] + list(script_args) + + def run_script(self, script_name: str, *script_args: str) -> str: + args = self.get_script_args(script_name, *script_args) p = subprocess.Popen( args, env=get_mockserver_env(), @@ -517,6 +523,29 @@ class CrawlerProcessSubprocess(ScriptRun self.assertIn("Spider closed (finished)", log) self.assertIn("The value of FOO is 42", log) + def test_shutdown_graceful(self): + sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK + args = self.get_script_args("sleeping.py") + p = PopenSpawn(args, timeout=5) + p.expect_exact("Spider opened") + p.expect_exact("Crawled (200)") + p.kill(sig) + p.expect_exact("shutting down gracefully") + p.expect_exact("Spider closed (shutdown)") + p.wait() + + def test_shutdown_forced(self): + sig = signal.SIGINT if sys.platform != "win32" else signal.SIGBREAK + args = self.get_script_args("sleeping.py") + p = PopenSpawn(args, timeout=5) + p.expect_exact("Spider opened") + p.expect_exact("Crawled (200)") + p.kill(sig) + p.expect_exact("shutting down gracefully") + p.kill(sig) + p.expect_exact("forcing unclean shutdown") + p.wait() + class CrawlerRunnerSubprocess(ScriptRunnerMixin, unittest.TestCase): script_dir = Path(__file__).parent.resolve() / "CrawlerRunner"