Script 'mail_helper' called by obssrc Hello community, here is the log from the commit of package python-sqlparse for openSUSE:Factory checked in at 2023-05-26 20:15:28 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-sqlparse (Old) and /work/SRC/openSUSE:Factory/.python-sqlparse.new.1533 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-sqlparse" Fri May 26 20:15:28 2023 rev:15 rq:1089053 version:0.4.4 Changes: -------- --- /work/SRC/openSUSE:Factory/python-sqlparse/python-sqlparse.changes 2022-10-04 20:37:15.852876933 +0200 +++ /work/SRC/openSUSE:Factory/.python-sqlparse.new.1533/python-sqlparse.changes 2023-05-26 20:15:39.460328536 +0200 @@ -1,0 +2,20 @@ +Thu May 25 16:21:05 UTC 2023 - Matej Cepl <mc...@suse.com> + +- Update to 0.4.4: + * IMPORTANT: This release fixes a security vulnerability in + the parser where a regular expression vulnerable to ReDOS + (Regular Expression Denial of Service) was used. See the + security advisory for details (CVE-2023-30608, bsc#1210617, + https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2) + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + * Revert a change from 0.4.0 that changed IN to be a comparison + (issue694). The primary expectation is that IN is treated as + a keyword and not as a comparison operator. That also follows + the definition of reserved keywords for the major SQL syntax + definitions. + * Fix regular expressions for string parsing. + * sqlparse now uses pyproject.toml instead of setup.cfg + (issue685). + +------------------------------------------------------------------- Old: ---- sqlparse-0.4.3.tar.gz New: ---- sqlparse-0.4.4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-sqlparse.spec ++++++ --- /var/tmp/diff_new_pack.pZdfYc/_old 2023-05-26 20:15:40.156332685 +0200 +++ /var/tmp/diff_new_pack.pZdfYc/_new 2023-05-26 20:15:40.208332995 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-sqlparse # -# Copyright (c) 2022 SUSE LLC +# Copyright (c) 2023 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -16,24 +16,24 @@ # -%{?!python_module:%define python_module() python-%{**} python3-%{**}} %define skip_python2 1 Name: python-sqlparse -Version: 0.4.3 +Version: 0.4.4 Release: 0 Summary: Non-validating SQL parser License: BSD-3-Clause Group: Development/Languages/Python URL: https://github.com/andialbrecht/sqlparse Source: https://files.pythonhosted.org/packages/source/s/sqlparse/sqlparse-%{version}.tar.gz +BuildRequires: %{python_module flit-core} +BuildRequires: %{python_module pip} BuildRequires: %{python_module pytest} -BuildRequires: %{python_module setuptools} +BuildRequires: %{python_module wheel} BuildRequires: fdupes BuildRequires: python-rpm-macros Requires(post): update-alternatives Requires(postun):update-alternatives BuildArch: noarch - %python_subpackages %description @@ -43,12 +43,13 @@ %prep %setup -q -n sqlparse-%{version} sed -i -e '1{\,^#!%{_bindir}/env python,d}' sqlparse/__main__.py sqlparse/cli.py +chmod -x sqlparse/cli.py %build -%python_build +%pyproject_wheel %install -%python_install +%pyproject_install %python_clone -a %{buildroot}%{_bindir}/sqlformat %python_expand %fdupes %{buildroot}%{$python_sitelib} @@ -65,6 +66,7 @@ %doc AUTHORS README.rst %license LICENSE %python_alternative %{_bindir}/sqlformat -%{python_sitelib}/* +%{python_sitelib}/sqlparse +%{python_sitelib}/sqlparse-%{version}*-info %changelog ++++++ sqlparse-0.4.3.tar.gz -> sqlparse-0.4.4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/CHANGELOG new/sqlparse-0.4.4/CHANGELOG --- old/sqlparse-0.4.3/CHANGELOG 2022-09-23 20:30:49.000000000 +0200 +++ new/sqlparse-0.4.4/CHANGELOG 2023-04-18 10:27:22.670305700 +0200 @@ -1,3 +1,28 @@ +Release 0.4.4 (Apr 18, 2023) +---------------------------- + +Notable Changes + +* IMPORTANT: This release fixes a security vulnerability in the + parser where a regular expression vulnerable to ReDOS (Regular + Expression Denial of Service) was used. See the security advisory + for details: https://github.com/andialbrecht/sqlparse/security/advisories/GHSA-rrm6-wvj7-cwh2 + The vulnerability was discovered by @erik-krogh from GitHub + Security Lab (GHSL). Thanks for reporting! + +Bug Fixes + +* Revert a change from 0.4.0 that changed IN to be a comparison (issue694). + The primary expectation is that IN is treated as a keyword and not as a + comparison operator. That also follows the definition of reserved keywords + for the major SQL syntax definitions. +* Fix regular expressions for string parsing. + +Other + +* sqlparse now uses pyproject.toml instead of setup.cfg (issue685). + + Release 0.4.3 (Sep 23, 2022) ---------------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/MANIFEST.in new/sqlparse-0.4.4/MANIFEST.in --- old/sqlparse-0.4.3/MANIFEST.in 2019-10-09 10:03:19.000000000 +0200 +++ new/sqlparse-0.4.4/MANIFEST.in 1970-01-01 01:00:00.000000000 +0100 @@ -1,11 +0,0 @@ -recursive-include docs source/* -include docs/sqlformat.1 -include docs/Makefile -recursive-include tests *.py *.sql -include LICENSE -include TODO -include AUTHORS -include CHANGELOG -include Makefile -include setup.cfg -include tox.ini diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/Makefile new/sqlparse-0.4.4/Makefile --- old/sqlparse-0.4.3/Makefile 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/Makefile 2022-12-30 16:04:41.268404000 +0100 @@ -22,5 +22,5 @@ release: @rm -rf dist/ - python setup.py sdist bdist_wheel + python -m build twine upload --sign --identity E0B84F81 dist/* diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/PKG-INFO new/sqlparse-0.4.4/PKG-INFO --- old/sqlparse-0.4.3/PKG-INFO 2022-09-23 20:40:44.459821500 +0200 +++ new/sqlparse-0.4.4/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,15 +1,10 @@ Metadata-Version: 2.1 Name: sqlparse -Version: 0.4.3 +Version: 0.4.4 Summary: A non-validating SQL parser. -Home-page: https://github.com/andialbrecht/sqlparse -Author: Andi Albrecht -Author-email: albrecht.a...@gmail.com -License: BSD-3-Clause -Project-URL: Documentation, https://sqlparse.readthedocs.io/ -Project-URL: Release Notes, https://sqlparse.readthedocs.io/en/latest/changes/ -Project-URL: Source, https://github.com/andialbrecht/sqlparse -Project-URL: Tracker, https://github.com/andialbrecht/sqlparse/issues +Author-email: Andi Albrecht <albrecht.a...@gmail.com> +Requires-Python: >=3.5 +Description-Content-Type: text/x-rst Classifier: Development Status :: 5 - Production/Stable Classifier: Intended Audience :: Developers Classifier: License :: OSI Approved :: BSD License @@ -27,9 +22,19 @@ Classifier: Programming Language :: Python :: Implementation :: PyPy Classifier: Topic :: Database Classifier: Topic :: Software Development -Requires-Python: >=3.5 -License-File: LICENSE -License-File: AUTHORS +Requires-Dist: flake8 ; extra == "dev" +Requires-Dist: build ; extra == "dev" +Requires-Dist: sphinx ; extra == "doc" +Requires-Dist: pytest ; extra == "test" +Requires-Dist: pytest-cov ; extra == "test" +Project-URL: Documentation, https://sqlparse.readthedocs.io/ +Project-URL: Home, https://github.com/andialbrecht/sqlparse +Project-URL: Release Notes, https://sqlparse.readthedocs.io/en/latest/changes/ +Project-URL: Source, https://github.com/andialbrecht/sqlparse +Project-URL: Tracker, https://github.com/andialbrecht/sqlparse/issues +Provides-Extra: dev +Provides-Extra: doc +Provides-Extra: test python-sqlparse - Parse SQL statements ====================================== @@ -109,3 +114,4 @@ .. _docs: https://sqlparse.readthedocs.io/en/latest/?badge=latest .. |packageversion| image:: https://img.shields.io/pypi/v/sqlparse?color=%2334D058&label=pypi%20package .. _packageversion: https://pypi.org/project/sqlparse + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/docs/source/extending.rst new/sqlparse-0.4.4/docs/source/extending.rst --- old/sqlparse-0.4.3/docs/source/extending.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/sqlparse-0.4.4/docs/source/extending.rst 2023-03-20 08:41:41.863353700 +0100 @@ -0,0 +1,76 @@ +Extending :mod:`sqlparse` +========================= + +.. module:: sqlparse + :synopsis: Extending parsing capability of sqlparse. + +The :mod:`sqlparse` module uses a sql grammar that was tuned through usage and numerous +PR to fit a broad range of SQL syntaxes, but it cannot cater to every given case since +some SQL dialects have adopted conflicting meanings of certain keywords. Sqlparse +therefore exposes a mechanism to configure the fundamental keywords and regular +expressions that parse the language as described below. + +If you find an adaptation that works for your specific use-case. Please consider +contributing it back to the community by opening a PR on +`GitHub <https://github.com/andialbrecht/sqlparse>`_. + +Configuring the Lexer +--------------------- + +The lexer is a singleton class that breaks down the stream of characters into language +tokens. It does this by using a sequence of regular expressions and keywords that are +listed in the file ``sqlparse.keywords``. Instead of applying these fixed grammar +definitions directly, the lexer is default initialized in its method called +``default_initialization()``. As an api user, you can adapt the Lexer configuration by +applying your own configuration logic. To do so, start out by clearing previous +configurations with ``.clear()``, then apply the SQL list with +``.set_SQL_REGEX(SQL_REGEX)``, and apply keyword lists with ``.add_keywords(KEYWORDS)``. + +You can do so by re-using the expressions in ``sqlparse.keywords`` (see example below), +leaving parts out, or by making up your own master list. + +See the expected types of the arguments by inspecting their structure in +``sqlparse.keywords``. +(For compatibility with python 3.4, this library does not use type-hints.) + +The following example adds support for the expression ``ZORDER BY``, and adds ``BAR`` as +a keyword to the lexer: + +.. code-block:: python + + import re + + import sqlparse + from sqlparse import keywords + from sqlparse.lexer import Lexer + + # get the lexer singleton object to configure it + lex = Lexer.get_default_instance() + + # Clear the default configurations. + # After this call, reg-exps and keyword dictionaries need to be loaded + # to make the lexer functional again. + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + # slice the default SQL_REGEX to inject the custom object + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + + # add the default keyword dictionaries + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + # add a custom keyword dictionary + lex.add_keywords({'BAR', sqlparse.tokens.Keyword}) + + # no configuration is passed here. The lexer is used as a singleton. + sqlparse.parse("select * from foo zorder by bar;") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/docs/source/index.rst new/sqlparse-0.4.4/docs/source/index.rst --- old/sqlparse-0.4.3/docs/source/index.rst 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/docs/source/index.rst 2023-03-20 08:41:41.866786000 +0100 @@ -20,6 +20,7 @@ api analyzing ui + extending changes license indices diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/pyproject.toml new/sqlparse-0.4.4/pyproject.toml --- old/sqlparse-0.4.3/pyproject.toml 1970-01-01 01:00:00.000000000 +0100 +++ new/sqlparse-0.4.4/pyproject.toml 2022-12-30 16:04:41.268766600 +0100 @@ -0,0 +1,70 @@ +[build-system] +requires = ["flit_core >=3.2,<4"] +build-backend = "flit_core.buildapi" + +[project] +name = "sqlparse" +description = "A non-validating SQL parser." +authors = [{name = "Andi Albrecht", email = "albrecht.a...@gmail.com"}] +readme = "README.rst" +dynamic = ["version"] +classifiers = [ + "Development Status :: 5 - Production/Stable", + "Intended Audience :: Developers", + "License :: OSI Approved :: BSD License", + "Operating System :: OS Independent", + "Programming Language :: Python", + "Programming Language :: Python :: 3", + "Programming Language :: Python :: 3 :: Only", + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: Implementation :: CPython", + "Programming Language :: Python :: Implementation :: PyPy", + "Topic :: Database", + "Topic :: Software Development", +] +requires-python = ">=3.5" + +[project.urls] +Home = "https://github.com/andialbrecht/sqlparse" +Documentation = "https://sqlparse.readthedocs.io/" +"Release Notes" = "https://sqlparse.readthedocs.io/en/latest/changes/" +Source = "https://github.com/andialbrecht/sqlparse" +Tracker = "https://github.com/andialbrecht/sqlparse/issues" + +[project.scripts] +sqlformat = "sqlparse.__main__:main" + +[project.optional-dependencies] +dev = [ + "flake8", + "build", +] +test = [ + "pytest", + "pytest-cov", +] +doc = [ + "sphinx", +] + +[tool.flit.sdist] +include = [ + "docs/source/", + "docs/sqlformat.1", + "docs/Makefile", + "tests/*.py", "tests/files/*.sql", + "LICENSE", + "TODO", + "AUTHORS", + "CHANGELOG", + "Makefile", + "tox.ini", +] + +[tool.coverage.run] +omit = ["sqlparse/__main__.py"] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/setup.cfg new/sqlparse-0.4.4/setup.cfg --- old/sqlparse-0.4.3/setup.cfg 2022-09-23 20:40:44.461999000 +0200 +++ new/sqlparse-0.4.4/setup.cfg 1970-01-01 01:00:00.000000000 +0100 @@ -1,60 +0,0 @@ -[metadata] -name = sqlparse -version = attr: sqlparse.__version__ -url = https://github.com/andialbrecht/sqlparse -author = Andi Albrecht -author_email = albrecht.a...@gmail.com -description = A non-validating SQL parser. -long_description = file: README.rst -license = BSD-3-Clause -classifiers = - Development Status :: 5 - Production/Stable - Intended Audience :: Developers - License :: OSI Approved :: BSD License - Operating System :: OS Independent - Programming Language :: Python - Programming Language :: Python :: 3 - Programming Language :: Python :: 3 :: Only - Programming Language :: Python :: 3.5 - Programming Language :: Python :: 3.6 - Programming Language :: Python :: 3.7 - Programming Language :: Python :: 3.8 - Programming Language :: Python :: 3.9 - Programming Language :: Python :: 3.10 - Programming Language :: Python :: Implementation :: CPython - Programming Language :: Python :: Implementation :: PyPy - Topic :: Database - Topic :: Software Development -project_urls = - Documentation = https://sqlparse.readthedocs.io/ - Release Notes = https://sqlparse.readthedocs.io/en/latest/changes/ - Source = https://github.com/andialbrecht/sqlparse - Tracker = https://github.com/andialbrecht/sqlparse/issues - -[options] -python_requires = >=3.5 -packages = find: - -[options.packages.find] -exclude = tests - -[options.entry_points] -console_scripts = - sqlformat = sqlparse.__main__:main - -[tool:pytest] -xfail_strict = True - -[flake8] -extend-ignore = - E731 - -[coverage:run] -branch = False -omit = - sqlparse/__main__.py - -[egg_info] -tag_build = -tag_date = 0 - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/setup.py new/sqlparse-0.4.4/setup.py --- old/sqlparse-0.4.3/setup.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/setup.py 1970-01-01 01:00:00.000000000 +0100 @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -# Copyright (C) 2009-2020 the sqlparse authors and contributors -# <see AUTHORS file> -# -# This setup script is part of python-sqlparse and is released under -# the BSD License: https://opensource.org/licenses/BSD-3-Clause - -from setuptools import setup - - -setup() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse/__init__.py new/sqlparse-0.4.4/sqlparse/__init__.py --- old/sqlparse-0.4.3/sqlparse/__init__.py 2022-09-23 20:29:53.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse/__init__.py 2023-04-18 10:24:53.735503000 +0200 @@ -16,7 +16,7 @@ from sqlparse import formatter -__version__ = '0.4.3' +__version__ = '0.4.4' __all__ = ['engine', 'filters', 'formatter', 'sql', 'tokens', 'cli'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse/keywords.py new/sqlparse-0.4.4/sqlparse/keywords.py --- old/sqlparse-0.4.3/sqlparse/keywords.py 2022-09-10 10:36:58.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse/keywords.py 2023-04-18 10:23:13.328575600 +0200 @@ -5,108 +5,92 @@ # This module is part of python-sqlparse and is released under # the BSD License: https://opensource.org/licenses/BSD-3-Clause -import re - from sqlparse import tokens - -def is_keyword(value): - """Checks for a keyword. - - If the given value is in one of the KEYWORDS_* dictionary - it's considered a keyword. Otherwise tokens.Name is returned. - """ - val = value.upper() - return (KEYWORDS_COMMON.get(val) - or KEYWORDS_ORACLE.get(val) - or KEYWORDS_PLPGSQL.get(val) - or KEYWORDS_HQL.get(val) - or KEYWORDS_MSACCESS.get(val) - or KEYWORDS.get(val, tokens.Name)), value - - -SQL_REGEX = { - 'root': [ - (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), - (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), - - (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), - (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), - - (r'(\r\n|\r|\n)', tokens.Newline), - (r'\s+?', tokens.Whitespace), - - (r':=', tokens.Assignment), - (r'::', tokens.Punctuation), - - (r'\*', tokens.Wildcard), - - (r"`(``|[^`])*`", tokens.Name), - (r"´(´´|[^´])*´", tokens.Name), - (r'((?<!\S)\$(?:[_A-ZÃ-Ã]\w*)?\$)[\s\S]*?\1', tokens.Literal), - - (r'\?', tokens.Name.Placeholder), - (r'%(\(\w+\))?s', tokens.Name.Placeholder), - (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder), - - (r'\\\w+', tokens.Command), - (r'(NOT\s+)?(IN)\b', tokens.Operator.Comparison), - # FIXME(andi): VALUES shouldn't be listed here - # see https://github.com/andialbrecht/sqlparse/pull/64 - # AS and IN are special, it may be followed by a parenthesis, but - # are never functions, see issue183 and issue507 - (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword), - - (r'(@|##|#)[A-ZÃ-Ã]\w+', tokens.Name), - - # see issue #39 - # Spaces around period `schema . name` are valid identifier - # TODO: Spaces before period not implemented - (r'[A-ZÃ-Ã]\w*(?=\s*\.)', tokens.Name), # 'Name'. - # FIXME(atronah): never match, - # because `re.match` doesn't work with look-behind regexp feature - (r'(?<=\.)[A-ZÃ-Ã]\w*', tokens.Name), # .'Name' - (r'[A-ZÃ-Ã]\w*(?=\()', tokens.Name), # side effect: change kw to func - (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal), - (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float), - (r'(?![_A-ZÃ-Ã])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÃ-Ã])', - tokens.Number.Float), - (r'(?![_A-ZÃ-Ã])-?\d+(?![_A-ZÃ-Ã])', tokens.Number.Integer), - (r"'(''|\\\\|\\'|[^'])*'", tokens.String.Single), - # not a real string literal in ANSI SQL: - (r'"(""|\\\\|\\"|[^"])*"', tokens.String.Symbol), - (r'(""|".*?[^\\]")', tokens.String.Symbol), - # sqlite names can be escaped with [square brackets]. left bracket - # cannot be preceded by word character or a right bracket -- - # otherwise it's probably an array index - (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name), - (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' - r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), - (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword), - (r'NOT\s+NULL\b', tokens.Keyword), - (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword), - (r'UNION\s+ALL\b', tokens.Keyword), - (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), - (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), - (r'GROUP\s+BY\b', tokens.Keyword), - (r'ORDER\s+BY\b', tokens.Keyword), - (r'HANDLER\s+FOR\b', tokens.Keyword), - (r'(LATERAL\s+VIEW\s+)' - r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', - tokens.Keyword), - (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), - (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), - (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), - # Check for keywords, also returns tokens.Name if regex matches - # but the match isn't a keyword. - (r'[0-9_\w][_$#\w]*', is_keyword), - (r'[;:()\[\],\.]', tokens.Punctuation), - (r'[<>=~!]+', tokens.Operator.Comparison), - (r'[+/@#%^&|^-]+', tokens.Operator), - ]} - -FLAGS = re.IGNORECASE | re.UNICODE -SQL_REGEX = [(re.compile(rx, FLAGS).match, tt) for rx, tt in SQL_REGEX['root']] +# object() only supports "is" and is useful as a marker +# use this marker to specify that the given regex in SQL_REGEX +# shall be processed further through a lookup in the KEYWORDS dictionaries +PROCESS_AS_KEYWORD = object() + + +SQL_REGEX = [ + (r'(--|# )\+.*?(\r\n|\r|\n|$)', tokens.Comment.Single.Hint), + (r'/\*\+[\s\S]*?\*/', tokens.Comment.Multiline.Hint), + + (r'(--|# ).*?(\r\n|\r|\n|$)', tokens.Comment.Single), + (r'/\*[\s\S]*?\*/', tokens.Comment.Multiline), + + (r'(\r\n|\r|\n)', tokens.Newline), + (r'\s+?', tokens.Whitespace), + + (r':=', tokens.Assignment), + (r'::', tokens.Punctuation), + + (r'\*', tokens.Wildcard), + + (r"`(``|[^`])*`", tokens.Name), + (r"´(´´|[^´])*´", tokens.Name), + (r'((?<!\S)\$(?:[_A-ZÃ-Ã]\w*)?\$)[\s\S]*?\1', tokens.Literal), + + (r'\?', tokens.Name.Placeholder), + (r'%(\(\w+\))?s', tokens.Name.Placeholder), + (r'(?<!\w)[$:?]\w+', tokens.Name.Placeholder), + + (r'\\\w+', tokens.Command), + + # FIXME(andi): VALUES shouldn't be listed here + # see https://github.com/andialbrecht/sqlparse/pull/64 + # AS and IN are special, it may be followed by a parenthesis, but + # are never functions, see issue183 and issue507 + (r'(CASE|IN|VALUES|USING|FROM|AS)\b', tokens.Keyword), + + (r'(@|##|#)[A-ZÃ-Ã]\w+', tokens.Name), + + # see issue #39 + # Spaces around period `schema . name` are valid identifier + # TODO: Spaces before period not implemented + (r'[A-ZÃ-Ã]\w*(?=\s*\.)', tokens.Name), # 'Name'. + # FIXME(atronah): never match, + # because `re.match` doesn't work with look-behind regexp feature + (r'(?<=\.)[A-ZÃ-Ã]\w*', tokens.Name), # .'Name' + (r'[A-ZÃ-Ã]\w*(?=\()', tokens.Name), # side effect: change kw to func + (r'-?0x[\dA-F]+', tokens.Number.Hexadecimal), + (r'-?\d+(\.\d+)?E-?\d+', tokens.Number.Float), + (r'(?![_A-ZÃ-Ã])-?(\d+(\.\d*)|\.\d+)(?![_A-ZÃ-Ã])', + tokens.Number.Float), + (r'(?![_A-ZÃ-Ã])-?\d+(?![_A-ZÃ-Ã])', tokens.Number.Integer), + (r"'(''|\\'|[^'])*'", tokens.String.Single), + # not a real string literal in ANSI SQL: + (r'"(""|\\"|[^"])*"', tokens.String.Symbol), + (r'(""|".*?[^\\]")', tokens.String.Symbol), + # sqlite names can be escaped with [square brackets]. left bracket + # cannot be preceded by word character or a right bracket -- + # otherwise it's probably an array index + (r'(?<![\w\])])(\[[^\]\[]+\])', tokens.Name), + (r'((LEFT\s+|RIGHT\s+|FULL\s+)?(INNER\s+|OUTER\s+|STRAIGHT\s+)?' + r'|(CROSS\s+|NATURAL\s+)?)?JOIN\b', tokens.Keyword), + (r'END(\s+IF|\s+LOOP|\s+WHILE)?\b', tokens.Keyword), + (r'NOT\s+NULL\b', tokens.Keyword), + (r'NULLS\s+(FIRST|LAST)\b', tokens.Keyword), + (r'UNION\s+ALL\b', tokens.Keyword), + (r'CREATE(\s+OR\s+REPLACE)?\b', tokens.Keyword.DDL), + (r'DOUBLE\s+PRECISION\b', tokens.Name.Builtin), + (r'GROUP\s+BY\b', tokens.Keyword), + (r'ORDER\s+BY\b', tokens.Keyword), + (r'HANDLER\s+FOR\b', tokens.Keyword), + (r'(LATERAL\s+VIEW\s+)' + r'(EXPLODE|INLINE|PARSE_URL_TUPLE|POSEXPLODE|STACK)\b', + tokens.Keyword), + (r"(AT|WITH')\s+TIME\s+ZONE\s+'[^']+'", tokens.Keyword.TZCast), + (r'(NOT\s+)?(LIKE|ILIKE|RLIKE)\b', tokens.Operator.Comparison), + (r'(NOT\s+)?(REGEXP)\b', tokens.Operator.Comparison), + # Check for keywords, also returns tokens.Name if regex matches + # but the match isn't a keyword. + (r'\w[$#\w]*', PROCESS_AS_KEYWORD), + (r'[;:()\[\],\.]', tokens.Punctuation), + (r'[<>=~!]+', tokens.Operator.Comparison), + (r'[+/@#%^&|^-]+', tokens.Operator), +] KEYWORDS = { 'ABORT': tokens.Keyword, diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse/lexer.py new/sqlparse-0.4.4/sqlparse/lexer.py --- old/sqlparse-0.4.3/sqlparse/lexer.py 2022-09-10 10:29:34.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse/lexer.py 2023-03-20 08:41:41.867994500 +0100 @@ -6,6 +6,7 @@ # the BSD License: https://opensource.org/licenses/BSD-3-Clause """SQL Lexer""" +import re # This code is based on the SqlLexer in pygments. # http://pygments.org/ @@ -14,18 +15,90 @@ from io import TextIOBase -from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX +from sqlparse import tokens, keywords from sqlparse.utils import consume class Lexer: - """Lexer - Empty class. Leaving for backwards-compatibility - """ + """The Lexer supports configurable syntax. + To add support for additional keywords, use the `add_keywords` method.""" + + _default_intance = None + + # Development notes: + # - This class is prepared to be able to support additional SQL dialects + # in the future by adding additional functions that take the place of + # the function default_initialization() + # - The lexer class uses an explicit singleton behavior with the + # instance-getter method get_default_instance(). This mechanism has + # the advantage that the call signature of the entry-points to the + # sqlparse library are not affected. Also, usage of sqlparse in third + # party code does not need to be adapted. On the other hand, singleton + # behavior is not thread safe, and the current implementation does not + # easily allow for multiple SQL dialects to be parsed in the same + # process. Such behavior can be supported in the future by passing a + # suitably initialized lexer object as an additional parameter to the + # entry-point functions (such as `parse`). Code will need to be written + # to pass down and utilize such an object. The current implementation + # is prepared to support this thread safe approach without the + # default_instance part needing to change interface. + + @classmethod + def get_default_instance(cls): + """Returns the lexer instance used internally + by the sqlparse core functions.""" + if cls._default_intance is None: + cls._default_intance = cls() + cls._default_intance.default_initialization() + return cls._default_intance + + def default_initialization(self): + """Initialize the lexer with default dictionaries. + Useful if you need to revert custom syntax settings.""" + self.clear() + self.set_SQL_REGEX(keywords.SQL_REGEX) + self.add_keywords(keywords.KEYWORDS_COMMON) + self.add_keywords(keywords.KEYWORDS_ORACLE) + self.add_keywords(keywords.KEYWORDS_PLPGSQL) + self.add_keywords(keywords.KEYWORDS_HQL) + self.add_keywords(keywords.KEYWORDS_MSACCESS) + self.add_keywords(keywords.KEYWORDS) + + def clear(self): + """Clear all syntax configurations. + Useful if you want to load a reduced set of syntax configurations. + After this call, regexps and keyword dictionaries need to be loaded + to make the lexer functional again.""" + self._SQL_REGEX = [] + self._keywords = [] + + def set_SQL_REGEX(self, SQL_REGEX): + """Set the list of regex that will parse the SQL.""" + FLAGS = re.IGNORECASE | re.UNICODE + self._SQL_REGEX = [ + (re.compile(rx, FLAGS).match, tt) + for rx, tt in SQL_REGEX + ] + + def add_keywords(self, keywords): + """Add keyword dictionaries. Keywords are looked up in the same order + that dictionaries were added.""" + self._keywords.append(keywords) + + def is_keyword(self, value): + """Checks for a keyword. + + If the given value is in one of the KEYWORDS_* dictionary + it's considered a keyword. Otherwise, tokens.Name is returned. + """ + val = value.upper() + for kwdict in self._keywords: + if val in kwdict: + return kwdict[val], value + else: + return tokens.Name, value - @staticmethod - def get_tokens(text, encoding=None): + def get_tokens(self, text, encoding=None): """ Return an iterable of (tokentype, value) pairs generated from `text`. If `unfiltered` is set to `True`, the filtering mechanism @@ -57,15 +130,15 @@ iterable = enumerate(text) for pos, char in iterable: - for rexmatch, action in SQL_REGEX: + for rexmatch, action in self._SQL_REGEX: m = rexmatch(text, pos) if not m: continue elif isinstance(action, tokens._TokenType): yield action, m.group() - elif callable(action): - yield action(m.group()) + elif action is keywords.PROCESS_AS_KEYWORD: + yield self.is_keyword(m.group()) consume(iterable, m.end() - pos - 1) break @@ -79,4 +152,4 @@ Tokenize *sql* using the :class:`Lexer` and return a 2-tuple stream of ``(token type, value)`` items. """ - return Lexer().get_tokens(sql, encoding) + return Lexer.get_default_instance().get_tokens(sql, encoding) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse/sql.py new/sqlparse-0.4.4/sqlparse/sql.py --- old/sqlparse-0.4.3/sqlparse/sql.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse/sql.py 2023-03-20 08:41:41.868462300 +0100 @@ -413,27 +413,28 @@ Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) - if first_token is None: + token = self.token_first(skip_cm=True) + if token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. return 'UNKNOWN' - elif first_token.ttype in (T.Keyword.DML, T.Keyword.DDL): - return first_token.normalized + elif token.ttype in (T.Keyword.DML, T.Keyword.DDL): + return token.normalized - elif first_token.ttype == T.Keyword.CTE: + elif token.ttype == T.Keyword.CTE: # The WITH keyword should be followed by either an Identifier or # an IdentifierList containing the CTE definitions; the actual # DML keyword (e.g. SELECT, INSERT) will follow next. - fidx = self.token_index(first_token) - tidx, token = self.token_next(fidx, skip_ws=True) - if isinstance(token, (Identifier, IdentifierList)): - _, dml_keyword = self.token_next(tidx, skip_ws=True) + tidx = self.token_index(token) + while tidx is not None: + tidx, token = self.token_next(tidx, skip_ws=True) + if isinstance(token, (Identifier, IdentifierList)): + tidx, token = self.token_next(tidx, skip_ws=True) - if dml_keyword is not None \ - and dml_keyword.ttype == T.Keyword.DML: - return dml_keyword.normalized + if token is not None \ + and token.ttype == T.Keyword.DML: + return token.normalized # Hmm, probably invalid syntax, so return unknown. return 'UNKNOWN' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse.egg-info/PKG-INFO new/sqlparse-0.4.4/sqlparse.egg-info/PKG-INFO --- old/sqlparse-0.4.3/sqlparse.egg-info/PKG-INFO 2022-09-23 20:40:44.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse.egg-info/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,111 +0,0 @@ -Metadata-Version: 2.1 -Name: sqlparse -Version: 0.4.3 -Summary: A non-validating SQL parser. -Home-page: https://github.com/andialbrecht/sqlparse -Author: Andi Albrecht -Author-email: albrecht.a...@gmail.com -License: BSD-3-Clause -Project-URL: Documentation, https://sqlparse.readthedocs.io/ -Project-URL: Release Notes, https://sqlparse.readthedocs.io/en/latest/changes/ -Project-URL: Source, https://github.com/andialbrecht/sqlparse -Project-URL: Tracker, https://github.com/andialbrecht/sqlparse/issues -Classifier: Development Status :: 5 - Production/Stable -Classifier: Intended Audience :: Developers -Classifier: License :: OSI Approved :: BSD License -Classifier: Operating System :: OS Independent -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3 :: Only -Classifier: Programming Language :: Python :: 3.5 -Classifier: Programming Language :: Python :: 3.6 -Classifier: Programming Language :: Python :: 3.7 -Classifier: Programming Language :: Python :: 3.8 -Classifier: Programming Language :: Python :: 3.9 -Classifier: Programming Language :: Python :: 3.10 -Classifier: Programming Language :: Python :: Implementation :: CPython -Classifier: Programming Language :: Python :: Implementation :: PyPy -Classifier: Topic :: Database -Classifier: Topic :: Software Development -Requires-Python: >=3.5 -License-File: LICENSE -License-File: AUTHORS - -python-sqlparse - Parse SQL statements -====================================== - -|buildstatus|_ -|coverage|_ -|docs|_ -|packageversion|_ - -.. docincludebegin - -sqlparse is a non-validating SQL parser for Python. -It provides support for parsing, splitting and formatting SQL statements. - -The module is compatible with Python 3.5+ and released under the terms of the -`New BSD license <https://opensource.org/licenses/BSD-3-Clause>`_. - -Visit the project page at https://github.com/andialbrecht/sqlparse for -further information about this project. - - -Quick Start ------------ - -.. code-block:: sh - - $ pip install sqlparse - -.. code-block:: python - - >>> import sqlparse - - >>> # Split a string containing two SQL statements: - >>> raw = 'select * from foo; select * from bar;' - >>> statements = sqlparse.split(raw) - >>> statements - ['select * from foo;', 'select * from bar;'] - - >>> # Format the first statement and print it out: - >>> first = statements[0] - >>> print(sqlparse.format(first, reindent=True, keyword_case='upper')) - SELECT * - FROM foo; - - >>> # Parsing a SQL statement: - >>> parsed = sqlparse.parse('select * from foo')[0] - >>> parsed.tokens - [<DML 'select' at 0x7f22c5e15368>, <Whitespace ' ' at 0x7f22c5e153b0>, <Wildcard '*' ⦠] - >>> - -Links ------ - -Project page - https://github.com/andialbrecht/sqlparse - -Bug tracker - https://github.com/andialbrecht/sqlparse/issues - -Documentation - https://sqlparse.readthedocs.io/ - -Online Demo - https://sqlformat.org/ - - -sqlparse is licensed under the BSD license. - -Parts of the code are based on pygments written by Georg Brandl and others. -pygments-Homepage: http://pygments.org/ - -.. |buildstatus| image:: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml/badge.svg -.. _buildstatus: https://github.com/andialbrecht/sqlparse/actions/workflows/python-app.yml -.. |coverage| image:: https://codecov.io/gh/andialbrecht/sqlparse/branch/master/graph/badge.svg -.. _coverage: https://codecov.io/gh/andialbrecht/sqlparse -.. |docs| image:: https://readthedocs.org/projects/sqlparse/badge/?version=latest -.. _docs: https://sqlparse.readthedocs.io/en/latest/?badge=latest -.. |packageversion| image:: https://img.shields.io/pypi/v/sqlparse?color=%2334D058&label=pypi%20package -.. _packageversion: https://pypi.org/project/sqlparse diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse.egg-info/SOURCES.txt new/sqlparse-0.4.4/sqlparse.egg-info/SOURCES.txt --- old/sqlparse-0.4.3/sqlparse.egg-info/SOURCES.txt 2022-09-23 20:40:44.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse.egg-info/SOURCES.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,74 +0,0 @@ -AUTHORS -CHANGELOG -LICENSE -MANIFEST.in -Makefile -README.rst -TODO -setup.cfg -setup.py -tox.ini -docs/Makefile -docs/sqlformat.1 -docs/source/analyzing.rst -docs/source/api.rst -docs/source/changes.rst -docs/source/conf.py -docs/source/index.rst -docs/source/indices.rst -docs/source/intro.rst -docs/source/license.rst -docs/source/ui.rst -sqlparse/__init__.py -sqlparse/__main__.py -sqlparse/cli.py -sqlparse/exceptions.py -sqlparse/formatter.py -sqlparse/keywords.py -sqlparse/lexer.py -sqlparse/sql.py -sqlparse/tokens.py -sqlparse/utils.py -sqlparse.egg-info/PKG-INFO -sqlparse.egg-info/SOURCES.txt -sqlparse.egg-info/dependency_links.txt -sqlparse.egg-info/entry_points.txt -sqlparse.egg-info/top_level.txt -sqlparse/engine/__init__.py -sqlparse/engine/filter_stack.py -sqlparse/engine/grouping.py -sqlparse/engine/statement_splitter.py -sqlparse/filters/__init__.py -sqlparse/filters/aligned_indent.py -sqlparse/filters/others.py -sqlparse/filters/output.py -sqlparse/filters/reindent.py -sqlparse/filters/right_margin.py -sqlparse/filters/tokens.py -tests/__init__.py -tests/conftest.py -tests/test_cli.py -tests/test_format.py -tests/test_grouping.py -tests/test_keywords.py -tests/test_parse.py -tests/test_regressions.py -tests/test_split.py -tests/test_tokenize.py -tests/test_utils.py -tests/files/_Make_DirEntry.sql -tests/files/begintag.sql -tests/files/begintag_2.sql -tests/files/casewhen_procedure.sql -tests/files/dashcomment.sql -tests/files/encoding_gbk.sql -tests/files/encoding_utf8.sql -tests/files/function.sql -tests/files/function_psql.sql -tests/files/function_psql2.sql -tests/files/function_psql3.sql -tests/files/function_psql4.sql -tests/files/huge_select.sql -tests/files/mysql_handler.sql -tests/files/stream.sql -tests/files/test_cp1251.sql \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse.egg-info/dependency_links.txt new/sqlparse-0.4.4/sqlparse.egg-info/dependency_links.txt --- old/sqlparse-0.4.3/sqlparse.egg-info/dependency_links.txt 2022-09-23 20:40:44.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse.egg-info/dependency_links.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse.egg-info/entry_points.txt new/sqlparse-0.4.4/sqlparse.egg-info/entry_points.txt --- old/sqlparse-0.4.3/sqlparse.egg-info/entry_points.txt 2022-09-23 20:40:44.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse.egg-info/entry_points.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,2 +0,0 @@ -[console_scripts] -sqlformat = sqlparse.__main__:main diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/sqlparse.egg-info/top_level.txt new/sqlparse-0.4.4/sqlparse.egg-info/top_level.txt --- old/sqlparse-0.4.3/sqlparse.egg-info/top_level.txt 2022-09-23 20:40:44.000000000 +0200 +++ new/sqlparse-0.4.4/sqlparse.egg-info/top_level.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -sqlparse diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/tests/test_grouping.py new/sqlparse-0.4.4/tests/test_grouping.py --- old/sqlparse-0.4.3/tests/test_grouping.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/tests/test_grouping.py 2022-12-30 12:03:50.611888000 +0100 @@ -376,20 +376,10 @@ # issue183 p = sqlparse.parse('in(1, 2)')[0] assert len(p.tokens) == 2 - assert p.tokens[0].ttype == T.Comparison + assert p.tokens[0].ttype == T.Keyword assert isinstance(p.tokens[1], sql.Parenthesis) -def test_in_comparison(): - # issue566 - p = sqlparse.parse('a in (1, 2)')[0] - assert len(p.tokens) == 1 - assert isinstance(p.tokens[0], sql.Comparison) - assert len(p.tokens[0].tokens) == 5 - assert p.tokens[0].left.value == 'a' - assert p.tokens[0].right.value == '(1, 2)' - - def test_grouping_varchar(): p = sqlparse.parse('"text" Varchar(50) NOT NULL')[0] assert isinstance(p.tokens[2], sql.Function) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/tests/test_keywords.py new/sqlparse-0.4.4/tests/test_keywords.py --- old/sqlparse-0.4.3/tests/test_keywords.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/tests/test_keywords.py 2023-03-20 08:41:41.869629000 +0100 @@ -1,7 +1,7 @@ import pytest from sqlparse import tokens -from sqlparse.keywords import SQL_REGEX +from sqlparse.lexer import Lexer class TestSQLREGEX: @@ -9,5 +9,5 @@ '1.', '-1.', '.1', '-.1']) def test_float_numbers(self, number): - ttype = next(tt for action, tt in SQL_REGEX if action(number)) + ttype = next(tt for action, tt in Lexer.get_default_instance()._SQL_REGEX if action(number)) assert tokens.Number.Float == ttype diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/tests/test_parse.py new/sqlparse-0.4.4/tests/test_parse.py --- old/sqlparse-0.4.3/tests/test_parse.py 2022-09-10 10:05:08.000000000 +0200 +++ new/sqlparse-0.4.4/tests/test_parse.py 2023-03-20 08:41:41.870118600 +0100 @@ -4,7 +4,8 @@ import pytest import sqlparse -from sqlparse import sql, tokens as T +from sqlparse import sql, tokens as T, keywords +from sqlparse.lexer import Lexer def test_parse_tokenize(): @@ -489,3 +490,79 @@ T.Newline, T.Newline, T.Punctuation] + + +def test_configurable_keywords(): + sql = """select * from foo BACON SPAM EGGS;""" + tokens = sqlparse.parse(sql)[0] + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo BACON"), + (None, "SPAM EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + Lexer.get_default_instance().add_keywords( + { + "BACON": sqlparse.tokens.Name.Builtin, + "SPAM": sqlparse.tokens.Keyword, + "EGGS": sqlparse.tokens.Keyword, + } + ) + + tokens = sqlparse.parse(sql)[0] + + # reset the syntax for later tests. + Lexer.get_default_instance().default_initialization() + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + ) == [ + (sqlparse.tokens.Keyword.DML, "select"), + (sqlparse.tokens.Wildcard, "*"), + (sqlparse.tokens.Keyword, "from"), + (None, "foo"), + (sqlparse.tokens.Name.Builtin, "BACON"), + (sqlparse.tokens.Keyword, "SPAM"), + (sqlparse.tokens.Keyword, "EGGS"), + (sqlparse.tokens.Punctuation, ";"), + ] + + +def test_configurable_regex(): + lex = Lexer.get_default_instance() + lex.clear() + + my_regex = (r"ZORDER\s+BY\b", sqlparse.tokens.Keyword) + + lex.set_SQL_REGEX( + keywords.SQL_REGEX[:38] + + [my_regex] + + keywords.SQL_REGEX[38:] + ) + lex.add_keywords(keywords.KEYWORDS_COMMON) + lex.add_keywords(keywords.KEYWORDS_ORACLE) + lex.add_keywords(keywords.KEYWORDS_PLPGSQL) + lex.add_keywords(keywords.KEYWORDS_HQL) + lex.add_keywords(keywords.KEYWORDS_MSACCESS) + lex.add_keywords(keywords.KEYWORDS) + + tokens = sqlparse.parse("select * from foo zorder by bar;")[0] + + # reset the syntax for later tests. + Lexer.get_default_instance().default_initialization() + + assert list( + (t.ttype, t.value) + for t in tokens + if t.ttype not in sqlparse.tokens.Whitespace + )[4] == (sqlparse.tokens.Keyword, "zorder by") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/tests/test_regressions.py new/sqlparse-0.4.4/tests/test_regressions.py --- old/sqlparse-0.4.3/tests/test_regressions.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/tests/test_regressions.py 2023-03-20 08:41:41.870934000 +0100 @@ -427,3 +427,12 @@ 'grant foo to user1@`myhost`; grant bar to user1@`myhost`;') assert len(splitted) == 2 assert splitted[-1] == 'grant bar to user1@`myhost`;' + + +def test_comment_between_cte_clauses_issue632(): + p, = sqlparse.parse(""" + WITH foo AS (), + -- A comment before baz subquery + baz AS () + SELECT * FROM baz;""") + assert p.get_type() == "SELECT" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/sqlparse-0.4.3/tests/test_split.py new/sqlparse-0.4.4/tests/test_split.py --- old/sqlparse-0.4.3/tests/test_split.py 2022-09-06 21:32:40.000000000 +0200 +++ new/sqlparse-0.4.4/tests/test_split.py 2023-04-18 10:23:13.329035800 +0200 @@ -18,8 +18,8 @@ def test_split_backslash(): - stmts = sqlparse.parse(r"select '\\'; select '\''; select '\\\'';") - assert len(stmts) == 3 + stmts = sqlparse.parse("select '\'; select '\'';") + assert len(stmts) == 2 @pytest.mark.parametrize('fn', ['function.sql',