Re: D2948: wireproto: syntax for encoding CBOR into frames

2018-03-29 Thread Gregory Szorc
On Thu, Mar 29, 2018 at 9:36 AM, Josef 'Jeff' Sipek 
wrote:

> On Wed, Mar 28, 2018 at 22:06:17 +, indygreg (Gregory Szorc) wrote:
> > indygreg updated this revision to Diff 7352.
> >
> > REPOSITORY
> >   rHG Mercurial
> >
> > CHANGES SINCE LAST UPDATE
> >   https://phab.mercurial-scm.org/D2948?vs=7327=7352
> >
> > REVISION DETAIL
> >   https://phab.mercurial-scm.org/D2948
> >
> > AFFECTED FILES
> >   mercurial/debugcommands.py
> >   mercurial/utils/stringutil.py
> >   mercurial/wireprotoframing.py
> >   tests/test-wireproto-serverreactor.py
> >
> > CHANGE DETAILS
> >
> ...
> > diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.
> py
> > --- a/mercurial/wireprotoframing.py
> > +++ b/mercurial/wireprotoframing.py
> > @@ -16,6 +16,7 @@
> >  from .i18n import _
> >  from .thirdparty import (
> >  attr,
> > +cbor,
> >  )
> >  from . import (
> >  error,
> > @@ -156,6 +157,9 @@
> >  def makeframefromhumanstring(s):
> >  """Create a frame from a human readable string
> >
> > +DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL
> > +eval() USAGE. DO NOT USE IN CORE.
> > +
> >  Strings have the form:
> >
> >   
> > @@ -169,6 +173,11 @@
> >  named constant.
> >
> >  Flags can be delimited by `|` to bitwise OR them together.
> > +
> > +If the payload begins with ``cbor:``, the following string will be
> > +evaluated as Python code and the resulting object will be fed into
> > +a CBOR encoder. Otherwise, the payload is interpreted as a Python
> > +byte string literal.
> >  """
>
> Um, why?  Why not just *always* wrap byte strings in CBOR?  The overhead
> will be 1-9 bytes depending on the length (short strings will use 1-2 bytes
> of overhead), then there's no need to prefix anything with "cbor:".  Any
> string <4GB in size will incur 5 bytes overhead.  Which is (IMO)
> acceptable.
>

We may end up in that state. There was code emitting non-CBOR data and I
didn't want to scope bloat the commit.


>
> >  fields = s.split(b' ', 5)
> >  requestid, streamid, streamflags, frametype, frameflags, payload =
> fields
> > @@ -196,7 +205,11 @@
> >  else:
> >  finalflags |= int(flag)
> >
> > -payload = stringutil.unescapestr(payload)
> > +if payload.startswith(b'cbor:'):
> > +payload = cbor.dumps(stringutil.evalpython(payload[5:]),
> canonical=True)
> > +
> > +else:
> > +payload = stringutil.unescapestr(payload)
> >
> >  return makeframe(requestid=requestid, streamid=streamid,
> >   streamflags=finalstreamflags, typeid=frametype,
> > diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.
> py
> > --- a/mercurial/utils/stringutil.py
> > +++ b/mercurial/utils/stringutil.py
> > @@ -9,6 +9,7 @@
> >
> >  from __future__ import absolute_import
> >
> > +import __future__
> >  import codecs
> >  import re as remod
> >  import textwrap
> > @@ -286,3 +287,29 @@
> >  If s is not a valid boolean, returns None.
> >  """
> >  return _booleans.get(s.lower(), None)
> > +
> > +def evalpython(s):
> > +"""Evaluate a string containing a Python expression.
> > +
> > +THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE
> SHOULD BE
> > +LIMITED TO DEVELOPER-FACING FUNCTIONALITY.
> > +"""
> > +globs = {
> > +r'__builtins__': {
> > +r'None': None,
> > +r'False': False,
> > +r'True': True,
> > +r'int': int,
> > +r'set': set,
> > +r'tuple': tuple,
> > +# Don't need to expose dict and list because we can use
> > +# literals.
> > +},
> > +}
> > +
> > +# We can't use eval() directly because it inherits compiler
> > +# flags from this module and we need unicode literals for Python 3
> > +# compatibility.
> > +code = compile(s, r'', r'eval',
> > +   __future__.unicode_literals.compiler_flag, True)
> > +return eval(code, globs, {})
> > diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
> > --- a/mercurial/debugcommands.py
> > +++ b/mercurial/debugcommands.py
> > @@ -2785,7 +2785,10 @@
> >  or a flag name for stream flags or frame flags, respectively.
> Values are
> >  resolved to integers and then bitwise OR'd together.
> >
> > -``payload`` is is evaluated as a Python byte string literal.
> > +``payload`` represents the raw frame payload. If it begins with
> > +``cbor:``, the following string is evaluated as Python code and the
> > +resulting object is fed into a CBOR encoder. Otherwise it is
> interpreted
> > +as a Python byte string literal.
> >  """
> >  opts = pycompat.byteskwargs(opts)
> >
> >
> >
> >
> > To: indygreg, #hg-reviewers
> > Cc: mercurial-devel
> > ___
> > Mercurial-devel mailing list
> > Mercurial-devel@mercurial-scm.org
> > 

Re: D2948: wireproto: syntax for encoding CBOR into frames

2018-03-29 Thread Josef 'Jeff' Sipek
On Wed, Mar 28, 2018 at 22:06:17 +, indygreg (Gregory Szorc) wrote:
> indygreg updated this revision to Diff 7352.
> 
> REPOSITORY
>   rHG Mercurial
> 
> CHANGES SINCE LAST UPDATE
>   https://phab.mercurial-scm.org/D2948?vs=7327=7352
> 
> REVISION DETAIL
>   https://phab.mercurial-scm.org/D2948
> 
> AFFECTED FILES
>   mercurial/debugcommands.py
>   mercurial/utils/stringutil.py
>   mercurial/wireprotoframing.py
>   tests/test-wireproto-serverreactor.py
> 
> CHANGE DETAILS
> 
...
> diff --git a/mercurial/wireprotoframing.py b/mercurial/wireprotoframing.py
> --- a/mercurial/wireprotoframing.py
> +++ b/mercurial/wireprotoframing.py
> @@ -16,6 +16,7 @@
>  from .i18n import _
>  from .thirdparty import (
>  attr,
> +cbor,
>  )
>  from . import (
>  error,
> @@ -156,6 +157,9 @@
>  def makeframefromhumanstring(s):
>  """Create a frame from a human readable string
>  
> +DANGER: NOT SAFE TO USE WITH UNTRUSTED INPUT BECAUSE OF POTENTIAL
> +eval() USAGE. DO NOT USE IN CORE.
> +
>  Strings have the form:
>  
>   
> @@ -169,6 +173,11 @@
>  named constant.
>  
>  Flags can be delimited by `|` to bitwise OR them together.
> +
> +If the payload begins with ``cbor:``, the following string will be
> +evaluated as Python code and the resulting object will be fed into
> +a CBOR encoder. Otherwise, the payload is interpreted as a Python
> +byte string literal.
>  """

Um, why?  Why not just *always* wrap byte strings in CBOR?  The overhead
will be 1-9 bytes depending on the length (short strings will use 1-2 bytes
of overhead), then there's no need to prefix anything with "cbor:".  Any
string <4GB in size will incur 5 bytes overhead.  Which is (IMO) acceptable.

Jeff.

>  fields = s.split(b' ', 5)
>  requestid, streamid, streamflags, frametype, frameflags, payload = fields
> @@ -196,7 +205,11 @@
>  else:
>  finalflags |= int(flag)
>  
> -payload = stringutil.unescapestr(payload)
> +if payload.startswith(b'cbor:'):
> +payload = cbor.dumps(stringutil.evalpython(payload[5:]), 
> canonical=True)
> +
> +else:
> +payload = stringutil.unescapestr(payload)
>  
>  return makeframe(requestid=requestid, streamid=streamid,
>   streamflags=finalstreamflags, typeid=frametype,
> diff --git a/mercurial/utils/stringutil.py b/mercurial/utils/stringutil.py
> --- a/mercurial/utils/stringutil.py
> +++ b/mercurial/utils/stringutil.py
> @@ -9,6 +9,7 @@
>  
>  from __future__ import absolute_import
>  
> +import __future__
>  import codecs
>  import re as remod
>  import textwrap
> @@ -286,3 +287,29 @@
>  If s is not a valid boolean, returns None.
>  """
>  return _booleans.get(s.lower(), None)
> +
> +def evalpython(s):
> +"""Evaluate a string containing a Python expression.
> +
> +THIS FUNCTION IS NOT SAFE TO USE ON UNTRUSTED INPUT. IT'S USE SHOULD BE
> +LIMITED TO DEVELOPER-FACING FUNCTIONALITY.
> +"""
> +globs = {
> +r'__builtins__': {
> +r'None': None,
> +r'False': False,
> +r'True': True,
> +r'int': int,
> +r'set': set,
> +r'tuple': tuple,
> +# Don't need to expose dict and list because we can use
> +# literals.
> +},
> +}
> +
> +# We can't use eval() directly because it inherits compiler
> +# flags from this module and we need unicode literals for Python 3
> +# compatibility.
> +code = compile(s, r'', r'eval',
> +   __future__.unicode_literals.compiler_flag, True)
> +return eval(code, globs, {})
> diff --git a/mercurial/debugcommands.py b/mercurial/debugcommands.py
> --- a/mercurial/debugcommands.py
> +++ b/mercurial/debugcommands.py
> @@ -2785,7 +2785,10 @@
>  or a flag name for stream flags or frame flags, respectively. Values are
>  resolved to integers and then bitwise OR'd together.
>  
> -``payload`` is is evaluated as a Python byte string literal.
> +``payload`` represents the raw frame payload. If it begins with
> +``cbor:``, the following string is evaluated as Python code and the
> +resulting object is fed into a CBOR encoder. Otherwise it is interpreted
> +as a Python byte string literal.
>  """
>  opts = pycompat.byteskwargs(opts)
>  
> 
> 
> 
> To: indygreg, #hg-reviewers
> Cc: mercurial-devel
> ___
> Mercurial-devel mailing list
> Mercurial-devel@mercurial-scm.org
> https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel

-- 
Si hoc legere scis nimium eruditionis habes.
___
Mercurial-devel mailing list
Mercurial-devel@mercurial-scm.org
https://www.mercurial-scm.org/mailman/listinfo/mercurial-devel