commit python-lark-parser for openSUSE:Factory

root Mon, 04 Nov 2019 08:10:20 -0800

Hello community,

here is the log from the commit of package python-lark-parser for 
openSUSE:Factory checked in at 2019-11-04 17:09:17
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-lark-parser (Old)
 and      /work/SRC/openSUSE:Factory/.python-lark-parser.new.2990 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-lark-parser"

Mon Nov  4 17:09:17 2019 rev:6 rq:743738 version:0.7.7

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-lark-parser/python-lark-parser.changes    
2019-09-23 12:08:14.173897188 +0200
+++ 
/work/SRC/openSUSE:Factory/.python-lark-parser.new.2990/python-lark-parser.changes
  2019-11-04 17:09:19.620448337 +0100
@@ -1,0 +2,10 @@
+Sun Oct 27 14:28:26 UTC 2019 - Lars Vogdt <l...@linux-schulserver.de>
+
+- Update to 0.7.7:
+  * Fixed a bug in Earley where running it from different threads 
+    produced bad results
+  * Improved error reporting when using LALR
+  * Added 'edit_terminals' option, to allow programmatical manipulation
+    of terminals, for example to support keywords in different languages.
+
+-------------------------------------------------------------------

Old:
----
  lark-parser-0.7.5.tar.gz

New:
----
  lark-0.7.7.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-lark-parser.spec ++++++
--- /var/tmp/diff_new_pack.Z7O5HJ/_old  2019-11-04 17:09:20.208448966 +0100
+++ /var/tmp/diff_new_pack.Z7O5HJ/_new  2019-11-04 17:09:20.212448970 +0100
@@ -18,13 +18,13 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-lark-parser
-Version:        0.7.5
+Version:        0.7.7
 Release:        0
 Summary:        A parsing library for Python
 License:        MIT
 Group:          Development/Languages/Python
-URL:            https://github.com/erezsh/lark
-Source:         
https://github.com/lark-parser/lark/archive/%{version}.tar.gz#/lark-parser-%{version}.tar.gz
+URL:            https://github.com/lark-parser
+Source:         
https://github.com/lark-parser/lark/archive/%{version}.tar.gz#/lark-%{version}.tar.gz
 # extracted test gramars from nearley -> https://github.com/kach/nearley
 Source1:        testdata.tar.gz
 BuildRequires:  %{python_module Js2Py}

++++++ lark-parser-0.7.5.tar.gz -> lark-0.7.7.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/docs/grammar.md 
new/lark-0.7.7/docs/grammar.md
--- old/lark-0.7.5/docs/grammar.md      2019-09-06 07:18:42.000000000 +0200
+++ new/lark-0.7.7/docs/grammar.md      2019-10-03 10:29:49.000000000 +0200
@@ -1,5 +1,13 @@
 # Grammar Reference
 
+Table of contents:
+
+1. [Definitions](#defs)
+1. [Terminals](#terms)
+1. [Rules](#rules)
+1. [Directives](#dirs)
+
+<a name="defs"></a>
 ## Definitions
 
 **A grammar** is a list of rules and terminals, that together define a 
language.
@@ -25,6 +33,7 @@
 Names of rules are always in lowercase, while names of terminals are always in 
uppercase. This distinction has practical effects, for the shape of the 
generated parse-tree, and the automatic construction of the lexer (aka 
tokenizer, or scanner).
 
 
+<a name="terms"></a>
 ## Terminals
 
 Terminals are used to match text into symbols. They can be defined as a 
combination of literals and other terminals.
@@ -70,6 +79,53 @@
 SQL_SELECT: "select"i
 ```
 
+### Regular expressions & Ambiguity
+
+Each terminal is eventually compiled to a regular expression. All the 
operators and references inside it are mapped to their respective expressions.
+
+For example, in the following grammar, `A1` and `A2`, are equivalent:
+```perl
+A1: "a" | "b"
+A2: /a|b/
+```
+
+This means that inside terminals, Lark cannot detect or resolve ambiguity, 
even when using Earley.
+
+For example, for this grammar:
+```perl
+start           : (A | B)+
+A               : "a" | "ab"
+B               : "b"
+```
+We get this behavior:
+
+```bash
+>>> p.parse("ab")
+Tree(start, [Token(A, 'a'), Token(B, 'b')])
+```
+
+This is happening because Python's regex engine always returns the first 
matching option.
+
+If you find yourself in this situation, the recommended solution is to use 
rules instead.
+
+Example:
+
+```python
+>>> p = Lark("""start: (a | b)+
+...             !a: "a" | "ab"
+...             !b: "b"
+...             """, ambiguity="explicit")
+>>> print(p.parse("ab").pretty())
+_ambig
+  start
+    a   ab
+  start
+    a   a
+    b   b
+```
+
+
+<a name="rules"></a>
 ## Rules
 
 **Syntax:**
@@ -114,6 +170,7 @@
 
 Priority can be either positive or negative. In not specified for a terminal, 
it's assumed to be 1 (i.e. the default).
 
+<a name="dirs"></a>
 ## Directives
 
 ### %ignore
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/docs/json_tutorial.md 
new/lark-0.7.7/docs/json_tutorial.md
--- old/lark-0.7.5/docs/json_tutorial.md        2019-09-06 07:18:42.000000000 
+0200
+++ new/lark-0.7.7/docs/json_tutorial.md        2019-10-03 10:29:49.000000000 
+0200
@@ -230,7 +230,8 @@
 class MyTransformer(Transformer):
     def list(self, items):
         return list(items)
-    def pair(self, (k,v)):
+    def pair(self, key_value):
+        k, v = key_value
         return k, v
     def dict(self, items):
         return dict(items)
@@ -251,9 +252,11 @@
 from lark import Transformer
 
 class TreeToJson(Transformer):
-    def string(self, (s,)):
+    def string(self, s):
+        (s,) = s
         return s[1:-1]
-    def number(self, (n,)):
+    def number(self, n):
+        (n,) = n
         return float(n)
 
     list = list
@@ -315,9 +318,11 @@
     """
 
 class TreeToJson(Transformer):
-    def string(self, (s,)):
+    def string(self, s):
+        (s,) = s
         return s[1:-1]
-    def number(self, (n,)):
+    def number(self, n):
+        (n,) = n
         return float(n)
 
     list = list
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/lark/__init__.py 
new/lark-0.7.7/lark/__init__.py
--- old/lark-0.7.5/lark/__init__.py     2019-09-06 07:18:42.000000000 +0200
+++ new/lark-0.7.7/lark/__init__.py     2019-10-03 10:29:49.000000000 +0200
@@ -5,4 +5,4 @@
 from .lexer import Token
 from .lark import Lark
 
-__version__ = "0.7.5"
+__version__ = "0.7.7"
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/lark/lark.py new/lark-0.7.7/lark/lark.py
--- old/lark-0.7.5/lark/lark.py 2019-09-06 07:18:42.000000000 +0200
+++ new/lark-0.7.7/lark/lark.py 2019-10-03 10:29:49.000000000 +0200
@@ -69,6 +69,7 @@
         'propagate_positions': False,
         'lexer_callbacks': {},
         'maybe_placeholders': False,
+        'edit_terminals': None,
     }
 
     def __init__(self, options_dict):
@@ -205,6 +206,10 @@
         # Compile the EBNF grammar into BNF
         self.terminals, self.rules, self.ignore_tokens = 
self.grammar.compile(self.options.start)
 
+        if self.options.edit_terminals:
+            for t in self.terminals:
+                self.options.edit_terminals(t)
+
         self._terminals_dict = {t.name:t for t in self.terminals}
 
         # If the user asked to invert the priorities, negate them all here.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/lark/lexer.py new/lark-0.7.7/lark/lexer.py
--- old/lark-0.7.5/lark/lexer.py        2019-09-06 07:18:42.000000000 +0200
+++ new/lark-0.7.7/lark/lexer.py        2019-10-03 10:29:49.000000000 +0200
@@ -3,7 +3,7 @@
 import re
 
 from .utils import Str, classify, get_regexp_width, Py36, Serialize
-from .exceptions import UnexpectedCharacters, LexError
+from .exceptions import UnexpectedCharacters, LexError, UnexpectedToken
 
 ###{standalone
 
@@ -43,7 +43,7 @@
     __serialize_fields__ = 'value', 'flags'
 
     type = "str"
-    
+
     def to_regexp(self):
         return self._get_flags(re.escape(self.value))
 
@@ -166,36 +166,33 @@
 
         while line_ctr.char_pos < len(stream):
             lexer = self.lexer
-            for mre, type_from_index in lexer.mres:
-                m = mre.match(stream, line_ctr.char_pos)
-                if not m:
-                    continue
-
-                t = None
-                value = m.group(0)
-                type_ = type_from_index[m.lastindex]
-                if type_ not in ignore_types:
-                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, 
line_ctr.column)
-                    if t.type in lexer.callback:
-                        t = lexer.callback[t.type](t)
-                        if not isinstance(t, Token):
-                            raise ValueError("Callbacks must return a token 
(returned %r)" % t)
-                    last_token = t
-                    yield t
-                else:
-                    if type_ in lexer.callback:
-                        t = Token(type_, value, line_ctr.char_pos, 
line_ctr.line, line_ctr.column)
-                        lexer.callback[type_](t)
-
-                line_ctr.feed(value, type_ in newline_types)
-                if t:
-                    t.end_line = line_ctr.line
-                    t.end_column = line_ctr.column
+            res = lexer.match(stream, line_ctr.char_pos)
+            if not res:
+                allowed = {v for m, tfi in lexer.mres for v in tfi.values()} - 
ignore_types
+                if not allowed:
+                    allowed = {"<END-OF-FILE>"}
+                raise UnexpectedCharacters(stream, line_ctr.char_pos, 
line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, 
token_history=last_token and [last_token])
 
-                break
+            value, type_ = res
+
+            t = None
+            if type_ not in ignore_types:
+                t = Token(type_, value, line_ctr.char_pos, line_ctr.line, 
line_ctr.column)
+                if t.type in lexer.callback:
+                    t = lexer.callback[t.type](t)
+                    if not isinstance(t, Token):
+                        raise ValueError("Callbacks must return a token 
(returned %r)" % t)
+                last_token = t
+                yield t
             else:
-                allowed = {v for m, tfi in lexer.mres for v in tfi.values()}
-                raise UnexpectedCharacters(stream, line_ctr.char_pos, 
line_ctr.line, line_ctr.column, allowed=allowed, state=self.state, 
token_history=last_token and [last_token])
+                if type_ in lexer.callback:
+                    t = Token(type_, value, line_ctr.char_pos, line_ctr.line, 
line_ctr.column)
+                    lexer.callback[type_](t)
+
+            line_ctr.feed(value, type_ in newline_types)
+            if t:
+                t.end_line = line_ctr.line
+                t.end_column = line_ctr.column
 
 
 class UnlessCallback:
@@ -330,6 +327,11 @@
 
         self.mres = build_mres(terminals)
 
+    def match(self, stream, pos):
+        for mre, type_from_index in self.mres:
+            m = mre.match(stream, pos)
+            if m:
+                return m.group(0), type_from_index[m.lastindex]
 
     def lex(self, stream):
         return _Lex(self).lex(stream, self.newline_types, self.ignore_types)
@@ -367,9 +369,21 @@
 
     def lex(self, stream):
         l = _Lex(self.lexers[self.parser_state], self.parser_state)
-        for x in l.lex(stream, self.root_lexer.newline_types, 
self.root_lexer.ignore_types):
-            yield x
-            l.lexer = self.lexers[self.parser_state]
-            l.state = self.parser_state
+        try:
+            for x in l.lex(stream, self.root_lexer.newline_types, 
self.root_lexer.ignore_types):
+                yield x
+                l.lexer = self.lexers[self.parser_state]
+                l.state = self.parser_state
+        except UnexpectedCharacters as e:
+            # In the contextual lexer, UnexpectedCharacters can mean that the 
terminal is defined,
+            # but not in the current context.
+            # This tests the input against the global context, to provide a 
nicer error.
+            root_match = self.root_lexer.match(stream, e.pos_in_stream)
+            if not root_match:
+                raise
+
+            value, type_ = root_match
+            t = Token(type_, value, e.pos_in_stream, e.line, e.column)
+            raise UnexpectedToken(t, e.allowed)
 
 ###}
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/lark-0.7.5/lark/parsers/earley.py 
new/lark-0.7.7/lark/parsers/earley.py
--- old/lark-0.7.5/lark/parsers/earley.py       2019-09-06 07:18:42.000000000 
+0200
+++ new/lark-0.7.7/lark/parsers/earley.py       2019-10-03 10:29:49.000000000 
+0200
@@ -46,12 +46,8 @@
             #  skip the extra tree walk. We'll also skip this if the user just 
didn't specify priorities
             #  on any rules.
             if self.forest_sum_visitor is None and rule.options and 
rule.options.priority is not None:
-                self.forest_sum_visitor = ForestSumVisitor()
+                self.forest_sum_visitor = ForestSumVisitor
 
-        if resolve_ambiguity:
-            self.forest_tree_visitor = ForestToTreeVisitor(self.callbacks, 
self.forest_sum_visitor)
-        else:
-            self.forest_tree_visitor = 
ForestToAmbiguousTreeVisitor(self.callbacks, self.forest_sum_visitor)
         self.term_matcher = term_matcher
 
 
@@ -316,7 +312,10 @@
             assert False, 'Earley should not generate multiple start symbol 
items!'
 
         # Perform our SPPF -> AST conversion using the right ForestVisitor.
-        return self.forest_tree_visitor.visit(solutions[0])
+        forest_tree_visitor_cls = ForestToTreeVisitor if 
self.resolve_ambiguity else ForestToAmbiguousTreeVisitor
+        forest_tree_visitor = forest_tree_visitor_cls(self.callbacks, 
self.forest_sum_visitor and self.forest_sum_visitor())
+
+        return forest_tree_visitor.visit(solutions[0])
 
 
 class ApplyCallbacks(Transformer_InPlace):

commit python-lark-parser for openSUSE:Factory

Reply via email to