On Tue, Feb 04, 2014 at 05:00:25PM +0100, Santi Raffa wrote:
> For inbound data the simplest, safest thing to do is to traverse all
> JSON right after encoding and search for private parameters by key.
> 
> This ensures that all consumers of this data get Private values
> transparently and consistently; the serializing methods don't have to
> worry about trying to understand what kind of JSON it is receiving.
> It also minimizes the surface area of codebase that is exposed to the
> values directly.
> 
> The downside is it adds a ~5% time overhead to all JSON decoding as
> measured through:
> 
> $ time make hs-test-py_compat_types
> 
> As far as encoding to JSON is concerned, the serializer will redact
> Private values unless told to do so explicitly (e.g., for tests).
> 
> Signed-off-by: Santi Raffa <[email protected]>
> ---
>  lib/rpc/client.py                     |   3 +-
>  lib/serializer.py                     |  93 +++++++++++++++++++++------
>  src/Ganeti/Constants.hs               |   7 ++
>  test/hs/Test/Ganeti/OpCodes.hs        |   5 +-
>  test/py/cmdlib/node_unittest.py       |   1 -
>  test/py/ganeti.serializer_unittest.py | 118 
> ++++++++++++++++++++++++++++------
>  6 files changed, 188 insertions(+), 39 deletions(-)
> 
> diff --git a/lib/rpc/client.py b/lib/rpc/client.py
> index 83b7df9..47ce583 100644
> --- a/lib/rpc/client.py
> +++ b/lib/rpc/client.py
> @@ -120,7 +120,8 @@ def FormatRequest(method, args, version=None):
>      request[KEY_VERSION] = version
>  
>    # Serialize the request
> -  return serializer.DumpJson(request)
> +  return serializer.DumpJson(request,
> +                             
> private_decoder=serializer.EncodeWithPrivateFields)
>  

Almost ran out of characters with this one :)

Shouldn't it be private_encoder? After all, it's 'EncodeWithPrivateFields'.

>  def CallRPCMethod(transport_cb, method, args, version=None):
> diff --git a/lib/serializer.py b/lib/serializer.py
> index 7bd4c1f..5a112b7 100644
> --- a/lib/serializer.py
> +++ b/lib/serializer.py
> @@ -40,19 +40,22 @@ import simplejson
>  
>  from ganeti import errors
>  from ganeti import utils
> -
> +from ganeti import constants
>  
>  _RE_EOLSP = re.compile("[ \t]+$", re.MULTILINE)
>  
>  
> -def DumpJson(data):
> +def DumpJson(data, private_decoder=None):
>    """Serialize a given object.
>  
>    @param data: the data to serialize
>    @return: the string representation of data

Same comment as above. And docstring is missing.

>  
>    """
> -  encoded = simplejson.dumps(data)
> +  if private_decoder is None:
> +    # Do not leak private fields by default.
> +    private_decoder = EncodeWithoutPrivate
> +  encoded = simplejson.dumps(data, default=private_decoder)
>  
>    txt = _RE_EOLSP.sub("", encoded)
>    if not txt.endswith("\n"):
> @@ -69,10 +72,46 @@ def LoadJson(txt):
>    @return: the original data
>  
>    """
> -  return simplejson.loads(txt)
> +  values = simplejson.loads(txt)
> +
> +  # Hunt and seek for Private fields and wrap them.
> +  WrapPrivateValues(values)
> +
> +  return values
> +
> +
> +def WrapPrivateValues(json):

docstring

> +  todo = [json]
> +
> +  while todo:
> +    data = todo.pop()
> +
> +    if isinstance(data, list): # Array
> +      for item in data:
> +        todo.append(item)
> +    elif isinstance(data, dict): # Object
> +
> +      # This is kind of a kludge, but the only place where we know what 
> should
> +      # be protected is in opcodes.py, and not in a way that is helpful to 
> us,
> +      # especially in such a high traffic method; on the other hand, the
> +      # Haskell `py_compat_fields` test should complain whenever this check
> +      # does not protect fields properly.
> +      for field in data:
> +        value = data[field]
> +        if field in constants.PRIVATE_PARAMETERS_BLACKLIST:
> +          if not field.endswith("_cluster"):

This will break as soon as someone changes the parameters list.
This should be documented in the constant, I guess.

> +            data[field] = PrivateDict(value)
> +          else:
> +            for os in data[field]:
> +              value[os] = PrivateDict(value[os])
> +        else:
> +          todo.append(value)
> +    else: # Values
> +      pass

This is a strange algorithm.  Have you considered doing a recursive
traversal of the object tree?

> -def DumpSignedJson(data, key, salt=None, key_selector=None):
> +def DumpSignedJson(data, key, salt=None, key_selector=None,
> +                   private_decoder=None):
>    """Serialize a given object and authenticate it.

docstring is incomplete

>    @param data: the data to serialize
> @@ -82,7 +121,7 @@ def DumpSignedJson(data, key, salt=None, 
> key_selector=None):
>    @return: the string representation of data signed by the hmac key
>  
>    """
> -  txt = DumpJson(data)
> +  txt = DumpJson(data, private_decoder=private_decoder)
>    if salt is None:
>      salt = ""
>    signed_dict = {
> @@ -113,6 +152,9 @@ def LoadSignedJson(txt, key):
>  
>    """
>    signed_dict = LoadJson(txt)
> +
> +  WrapPrivateValues(signed_dict)
> +

Can't the line below fail if the WrapPrivateValues above returns an
instance of PrivateDict?

>    if not isinstance(signed_dict, dict):
>      raise errors.SignatureError("Invalid external message")
>    try:
> @@ -226,18 +268,19 @@ class Private(object):
>  class PrivateDict(dict):
>    """A dictionary that turns its values to private fields.
>  
> -  >>> PrivateDict()
> -  {}
> -  >>> supersekkrit = PrivateDict({"password": "foobar"})
> -  >>> supersekkrit["password"]
> -  Private(?, descr='password')
> -  >>> supersekkrit["password"].Get()
> -  'foobar'
> -  >>> supersekkrit["user"] = "eggspam"
> -  >>> supersekkrit.GetPrivate("user")
> -  'eggspam'
> -  >>> supersekkrit.Unprivate()
> -  {'password': 'foobar', 'user': 'eggspam'}
> +    >>> PrivateDict()
> +    {}
> +    >>> supersekkrit = PrivateDict({"password": "foobar"})
> +    >>> supersekkrit["password"]
> +    Private(?, descr='password')
> +    >>> supersekkrit["password"].Get()
> +    'foobar'
> +    >>> supersekkrit["user"] = "eggspam"
> +    >>> supersekkrit.GetPrivate("user")
> +    'eggspam'
> +    >>> supersekkrit.Unprivate()
> +    {'password': 'foobar', 'user': 'eggspam'}
> +

It seems you're just fixing the indentation.
I guess this should go in the patch where you first introduced the class.

>    """
>  

Unnecessary newline here

>    def __init__(self, data=None):
> @@ -279,6 +322,7 @@ class PrivateDict(dict):
>        'bar'
>        >>> PrivateDict({"foo": "bar"}).GetPrivate("baz", "spam")
>        'spam'
> +
>      """ # epydoc does not check doctests, btw.
>      if len(args) == 1:
>        key, = args
> @@ -299,8 +343,21 @@ class PrivateDict(dict):
>      {'foo': 'bar'}
>  
>      @rtype: dict
> +
>      """
>      returndict = {}
>      for key in self:
>        returndict[key] = self[key].Get()
>      return returndict
> +
> +
> +def EncodeWithoutPrivate(obj):
> +  if isinstance(obj, Private):
> +    return None
> +  raise TypeError(repr(obj) + " is not JSON serializable")

The message should mention the Private class. Otherwise a programmer
sees this error message and he will think that maybe the object he's
trying to serialize is missing some method needed by the serialization
or is not deriving from ConfigObject, or something else.

> +
> +
> +def EncodeWithPrivateFields(obj):
> +  if isinstance(obj, Private):
> +    return obj.Get()
> +  raise TypeError(repr(obj) + " is not JSON serializable")

There is
  EncodeWithoutPrivate
  EncodeWithPrivateFields

Please make it consistent.

> diff --git a/src/Ganeti/Constants.hs b/src/Ganeti/Constants.hs
> index 2975afc..642ae5b 100644
> --- a/src/Ganeti/Constants.hs
> +++ b/src/Ganeti/Constants.hs
> @@ -4764,3 +4764,10 @@ visNoLogNoSave = VisNoLogNoSave
>  -- | All visibility levels
>  visValidLevels :: FrozenSet ParamVisibility
>  visValidLevels = ConstantUtils.mkSet [minBound..]
> +
> +-- | Parameters that should be protected
> +privateParametersBlacklist :: [String]
> +privateParametersBlacklist = [ "osparams_private"
> +                             , "osparams_secret"
> +                             , "osparams_private_cluster" ]

List format is
  [ x1
  , x2
  ]

Thanks,
Jose

> +
> diff --git a/test/hs/Test/Ganeti/OpCodes.hs b/test/hs/Test/Ganeti/OpCodes.hs
> index 0719ae1..1411142 100644
> --- a/test/hs/Test/Ganeti/OpCodes.hs
> +++ b/test/hs/Test/Ganeti/OpCodes.hs
> @@ -516,7 +516,10 @@ case_py_compat_types = do
>                 \  op.Validate(True)\n\
>                 \encoded = [(op.Summary(), op.__getstate__())\n\
>                 \           for op in decoded]\n\
> -               \print serializer.Dump(encoded)" serialized
> +               \print serializer.Dump(\
> +               \  encoded,\
> +               \  private_decoder = serializer.EncodeWithPrivateFields)"
> +               serialized
>       >>= checkPythonResult
>    let deserialised =
>          J.decode py_stdout::J.Result [(String, OpCodes.MetaOpCode)]
> diff --git a/test/py/cmdlib/node_unittest.py b/test/py/cmdlib/node_unittest.py
> index d0ab416..8c5581f 100644
> --- a/test/py/cmdlib/node_unittest.py
> +++ b/test/py/cmdlib/node_unittest.py
> @@ -229,7 +229,6 @@ class TestLUNodeAdd(CmdlibTestCase):
>      self.ExecOpCodeExpectOpPrereqError(op, "Readded node doesn't have the 
> same"
>                                         " IP address configuration as before")
>  
> -
>    def testNodeHasSecondaryIpButNotMaster(self):
>      self.master.secondary_ip = self.master.primary_ip
>  
> diff --git a/test/py/ganeti.serializer_unittest.py 
> b/test/py/ganeti.serializer_unittest.py
> index 45a3932..4a7874c 100755
> --- a/test/py/ganeti.serializer_unittest.py
> +++ b/test/py/ganeti.serializer_unittest.py
> @@ -27,6 +27,7 @@ import unittest
>  from ganeti import serializer
>  from ganeti import errors
>  from ganeti import ht
> +from ganeti import objects
>  
>  import testutils
>  
> @@ -39,23 +40,28 @@ class TestSerializer(testutils.GanetiTestCase):
>      255,
>      [1, 2, 3],
>      (1, 2, 3),
> -    { "1": 2, "foo": "bar", },
> +    {"1": 2,
> +     "foo": "bar"},
>      ["abc", 1, 2, 3, 999,
>        {
>          "a1": ("Hello", "World"),
>          "a2": "This is only a test",
>          "a3": None,
> -        },
> -      {
> -        "foo": "bar",
> -        },
> -      ]
> +        "osparams:": serializer.PrivateDict({
> +                      "foo": 5,
> +                     })
> +      }
>      ]
> +  ]
>  
>    def _TestSerializer(self, dump_fn, load_fn):
> +    _dump_fn = lambda data: dump_fn(
> +      data,
> +      private_decoder=serializer.EncodeWithPrivateFields
> +    )
>      for data in self._TESTDATA:
> -      self.failUnless(dump_fn(data).endswith("\n"))
> -      self.assertEqualValues(load_fn(dump_fn(data)), data)
> +      self.failUnless(_dump_fn(data).endswith("\n"))
> +      self.assertEqualValues(load_fn(_dump_fn(data)), data)
>  
>    def testGeneric(self):
>      self._TestSerializer(serializer.Dump, serializer.Load)
> @@ -70,30 +76,35 @@ class TestSerializer(testutils.GanetiTestCase):
>      self._TestSigned(serializer.DumpSignedJson, serializer.LoadSignedJson)
>  
>    def _TestSigned(self, dump_fn, load_fn):
> +    _dump_fn = lambda *args, **kwargs: dump_fn(
> +      *args,
> +      private_decoder=serializer.EncodeWithPrivateFields,
> +      **kwargs
> +    )
>      for data in self._TESTDATA:
> -      self.assertEqualValues(load_fn(dump_fn(data, "mykey"), "mykey"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "mykey"), "mykey"),
>                               (data, ""))
> -      self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> -                                             salt="mysalt"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> +                                              salt="mysalt"),
>                                       "myprivatekey"),
>                               (data, "mysalt"))
>  
>        keydict = {
>          "mykey_id": "myprivatekey",
>          }
> -      self.assertEqualValues(load_fn(dump_fn(data, "myprivatekey",
> -                                             salt="mysalt",
> -                                             key_selector="mykey_id"),
> +      self.assertEqualValues(load_fn(_dump_fn(data, "myprivatekey",
> +                                              salt="mysalt",
> +                                              key_selector="mykey_id"),
>                                       keydict.get),
>                               (data, "mysalt"))
>        self.assertRaises(errors.SignatureError, load_fn,
> -                        dump_fn(data, "myprivatekey",
> -                                salt="mysalt",
> -                                key_selector="mykey_id"),
> +                        _dump_fn(data, "myprivatekey",
> +                                 salt="mysalt",
> +                                 key_selector="mykey_id"),
>                          {}.get)
>  
>      self.assertRaises(errors.SignatureError, load_fn,
> -                      dump_fn("test", "myprivatekey"),
> +                      _dump_fn("test", "myprivatekey"),
>                        "myotherkey")
>  
>      self.assertRaises(errors.SignatureError, load_fn,
> @@ -131,5 +142,76 @@ class TestLoadAndVerifyJson(unittest.TestCase):
>      self.assertEqual(serializer.LoadAndVerifyJson("\"Foo\"", ht.TAny), "Foo")
>  
>  
> +class TestPrivate(unittest.TestCase):
> +
> +  def testEquality(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    nDict = {"bar": "egg"}
> +    self.assertEqual(pDict, nDict, "PrivateDict-dict equality failure")
> +
> +  def testPrivateDictUnprivate(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    uDict = pDict.Unprivate()
> +    nDict = {"bar": "egg"}
> +    self.assertEquals(type(uDict), dict,
> +                      "PrivateDict.Unprivate() did not return a dict")
> +    self.assertEqual(pDict, uDict, "PrivateDict.Unprivate() equality 
> failure")
> +    self.assertEqual(nDict, uDict, "PrivateDict.Unprivate() failed to 
> return")
> +
> +  def testAttributeTransparency(self):
> +    class Dummy(object):
> +      pass
> +
> +    dummy = Dummy()
> +    dummy.bar = "egg"
> +    pDummy = serializer.Private(dummy)
> +    self.assertEqual(pDummy.bar, "egg", "Failed to access attribute of 
> Private")
> +
> +  def testCallTransparency(self):
> +    foo = serializer.Private("egg")
> +    self.assertEqual(foo.upper(), "EGG", "Failed to call Private instance")
> +
> +  def testFillDict(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    self.assertEqual(pDict, objects.FillDict({}, pDict))
> +
> +  def testLeak(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +    self.assertNotIn("egg", str(pDict), "Value leaked in str(PrivateDict)")
> +    self.assertNotIn("egg", repr(pDict), "Value leaked in repr(PrivateDict)")
> +    self.assertNotIn("egg", "{0}".format(pDict),
> +                     "Value leaked in PrivateDict.__format__")
> +    self.assertNotIn("egg", serializer.Dump(pDict),
> +                     "Value leaked in serializer.Dump(PrivateDict)")
> +
> +  def testProperAccess(self):
> +    pDict = serializer.PrivateDict()
> +    pDict["bar"] = "egg"
> +
> +    self.assertIs("egg", pDict["bar"].Get(),
> +                  "Value not returned by Private.Get()")
> +    self.assertIs("egg", pDict.GetPrivate("bar"),
> +                  "Value not returned by Private.GetPrivate()")
> +    self.assertIs("egg", pDict.Unprivate()["bar"],
> +                  "Value not returned by PrivateDict.Unprivate()")
> +    self.assertIn(
> +      "egg",
> +      serializer.Dump(pDict,
> +                      private_decoder=serializer.EncodeWithPrivateFields)
> +    )
> +
> +  def testDictGet(self):
> +    self.assertIs("tar", serializer.PrivateDict().GetPrivate("bar", "tar"),
> +                  "Private.GetPrivate() did not handle the default case.")
> +
> +  def testZeronessPrivate(self):
> +    self.assertTrue(serializer.Private("foo"),
> +                    "Private of non-empty string is false")
> +    self.assertFalse(serializer.Private(""), "Private empty string is true")
> +
>  if __name__ == "__main__":
>    testutils.GanetiTestProgram()
> -- 
> 1.9.0.rc1.175.g0b1dcb5
> 

-- 
Jose Antonio Lopes
Ganeti Engineering
Google Germany GmbH
Dienerstr. 12, 80331, München

Registergericht und -nummer: Hamburg, HRB 86891
Sitz der Gesellschaft: Hamburg
Geschäftsführer: Graham Law, Christine Elizabeth Flores
Steuernummer: 48/725/00206
Umsatzsteueridentifikationsnummer: DE813741370

Reply via email to