Re: [sqlalchemy] Query and compiled_cache
On Thu, Jun 6, 2013 at 2:20 PM, Michael Bayer mike...@zzzcomputing.com wrote: diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index c2ec72c..b458975 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -40,12 +40,15 @@ from ..sql import ( __all__ = ['Query', 'QueryContext', 'aliased'] -def _generative(*assertions): +def _generative(*assertions, maintain_baked=False): Mark a method as generative. @util.decorator def generate(fn, *args, **kw): self = args[0]._clone() +if not maintain_baked: +del self._baked_cache +del self._baked_context for assertion in assertions: assertion(self, fn.func_name) fn(self, *args[1:], **kw) @@ -1157,7 +1160,7 @@ class Query(object): self._lockmode = mode -@_generative() +@_generative(maintain_baked=True) def params(self, *args, **kwargs): add values for bind parameters which may have been specified in filter(). That doesn't seem to be enough. subqueryload seems to be using the wrong query still, after clearing the baked context, resulting in some very wrong sharing of connections between threads (I'm getting some very fun segmentation faults). I think it's with_parent, it clears the baked context through a filter, but the filter is called on the wrong query (the global query instance I use as template), that has the wrong session attached or something. This is my current BakedQuery: class BakedQuery(sqlalchemy.orm.query.Query): _baked_context = None _baked_cache = None def _clone(self): rv = super(BakedQuery, self)._clone() try: del rv._baked_context del rv._baked_cache except AttributeError: pass return rv def params(self, *p, **kw): rv = super(BakedQuery, self).params(*p, **kw) rv._baked_context = self._baked_context rv._baked_cache = self._baked_cache return rv def with_session(self, *p, **kw): rv = super(BakedQuery, self).with_session(*p, **kw) rv._baked_context = self._baked_context rv._baked_cache = self._baked_cache return rv @sqlalchemy.orm.query._generative() def bake_as(self, name, cache): Freeze the statement used by this Query. if name not in cache: cache[name] = context = self._compile_context() del context.session del context.query self._baked_context = cache[name] self._baked_cache = cache def _compile_context(self, **kw): if self._baked_context is not None: QueryContext = sqlalchemy.orm.query.QueryContext context = QueryContext.__new__(QueryContext) context.__dict__.update(self._baked_context.__dict__) context.query = self context.session = self.session # need to fix these names, urg context.attributes = context._attributes = context.attributes.copy() return context else: return super(BakedQuery, self)._compile_context(**kw) def _execute_and_instances(self, querycontext): if self._baked_cache is not None: self = self.execution_options(compiled_cache=self._baked_cache) return super(BakedQuery, self)._execute_and_instances(querycontext) And I invoke it like: def some_function(query = blabla.bake_as(blablah)): return query.with_session(S).params(...).first() This code still breaks if I don't use a baked template: def some_function(query = blabla): return query.with_session(S).bake_as(blablah).params(...).first() -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, Jun 7, 2013 at 1:56 PM, Claudio Freire klaussfre...@gmail.com wrote: This is my current BakedQuery: class BakedQuery(sqlalchemy.orm.query.Query): F**k gmail again. Why the F+!k doesn't it show me that it'll bork all whitespace before I hit send... I smell a bug report coming... class BakedQuery(sqlalchemy.orm.query.Query): ._baked_context = None ._baked_cache = None . .def _clone(self): .rv = super(BakedQuery, self)._clone() .try: .del rv._baked_context .del rv._baked_cache .except AttributeError: .pass .return rv . .def params(self, *p, **kw): .rv = super(BakedQuery, self).params(*p, **kw) .rv._baked_context = self._baked_context .rv._baked_cache = self._baked_cache .return rv . .def with_session(self, *p, **kw): .rv = super(BakedQuery, self).with_session(*p, **kw) .rv._baked_context = self._baked_context .rv._baked_cache = self._baked_cache .return rv . .@sqlalchemy.orm.query._generative() .def bake_as(self, name, cache): .Freeze the statement used by this Query. . .if name not in cache: .cache[name] = context = self._compile_context() .del context.session .del context.query .self._baked_context = cache[name] .self._baked_cache = cache . .def _compile_context(self, **kw): .if self._baked_context is not None: .QueryContext = sqlalchemy.orm.query.QueryContext .context = QueryContext.__new__(QueryContext) .context.__dict__.update(self._baked_context.__dict__) .context.query = self .context.session = self.session .# need to fix these names, urg .context.attributes = context._attributes = context.attributes.copy() .return context .else: .return super(BakedQuery, self)._compile_context(**kw) . .def _execute_and_instances(self, querycontext): .if self._baked_cache is not None: .self = self.execution_options(compiled_cache=self._baked_cache) .return super(BakedQuery, self)._execute_and_instances(querycontext) -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Tue, Jun 4, 2013 at 5:26 PM, Claudio Freire klaussfre...@gmail.com wrote: On Sun, Jun 2, 2013 at 9:41 PM, Claudio Freire klaussfre...@gmail.com wrote: So the whole thing is rolled up into the named thing I referred to also, so that there's no need to keep a Query object hanging around, when we say bake() we're really just referring to a position in the code somewhere, so I've updated the wiki recipe to use a named system like this: q = s.query(Foo).\ filter(Foo.data == bindparam('foo')).\ bake_as(foo, cache) result = q.params(foo='data 12').all() A highly cleaned up version of your test is attached. I'm still not sure I'm getting everything accounted for here! thanks for testing ! The feature is actually looking quite simple and probably works better as something built in, or at least if we added some methods to QueryContext to ease the burden of caching/copying it. Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. I've done only superficial testing for now, I have to build me a test database, but I thought I'd let you know, it seems to work flawless till now. Seems to break unpredictably with subqueryloads. I'll try to get a test for it. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Jun 6, 2013, at 1:03 PM, Claudio Freire klaussfre...@gmail.com wrote: On Tue, Jun 4, 2013 at 5:26 PM, Claudio Freire klaussfre...@gmail.com wrote: On Sun, Jun 2, 2013 at 9:41 PM, Claudio Freire klaussfre...@gmail.com wrote: So the whole thing is rolled up into the named thing I referred to also, so that there's no need to keep a Query object hanging around, when we say bake() we're really just referring to a position in the code somewhere, so I've updated the wiki recipe to use a named system like this: q = s.query(Foo).\ filter(Foo.data == bindparam('foo')).\ bake_as(foo, cache) result = q.params(foo='data 12').all() A highly cleaned up version of your test is attached. I'm still not sure I'm getting everything accounted for here! thanks for testing ! The feature is actually looking quite simple and probably works better as something built in, or at least if we added some methods to QueryContext to ease the burden of caching/copying it. Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. I've done only superficial testing for now, I have to build me a test database, but I thought I'd let you know, it seems to work flawless till now. Seems to break unpredictably with subqueryloads. I'll try to get a test for it. I think I know why that might be, because subqueryload is using that same query to build a new one.the new one still has the wrong _baked_context stuck on it. So one way to fix would be to hack into the recipe to blow away the _baked_context when subqueryload starts working with the query. Really, _baked_context should be automatically blown away for all generative methods except for params() and maybe some others. We should add some logic to _clone() and _generative() to control this (see below). But that starts defeating the purpose of the baking. So really, this starts looking like the actual feature - the Subqueryloader would want to detect this baked condition, and then bake the query that it makes as well, using the original baked name appended with the loader path that it's working with. See now that is starting to make this look cool (but we've gone into, this is a built in feature now territory). diff --git a/lib/sqlalchemy/orm/query.py b/lib/sqlalchemy/orm/query.py index c2ec72c..b458975 100644 --- a/lib/sqlalchemy/orm/query.py +++ b/lib/sqlalchemy/orm/query.py @@ -40,12 +40,15 @@ from ..sql import ( __all__ = ['Query', 'QueryContext', 'aliased'] -def _generative(*assertions): +def _generative(*assertions, maintain_baked=False): Mark a method as generative. @util.decorator def generate(fn, *args, **kw): self = args[0]._clone() +if not maintain_baked: +del self._baked_cache +del self._baked_context for assertion in assertions: assertion(self, fn.func_name) fn(self, *args[1:], **kw) @@ -1157,7 +1160,7 @@ class Query(object): self._lockmode = mode -@_generative() +@_generative(maintain_baked=True) def params(self, *args, **kwargs): add values for bind parameters which may have been specified in filter(). -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Sun, Jun 2, 2013 at 9:41 PM, Claudio Freire klaussfre...@gmail.comwrote: So the whole thing is rolled up into the named thing I referred to also, so that there's no need to keep a Query object hanging around, when we say bake() we're really just referring to a position in the code somewhere, so I've updated the wiki recipe to use a named system like this: q = s.query(Foo).\ filter(Foo.data == bindparam('foo')).\ bake_as(foo, cache) result = q.params(foo='data 12').all() A highly cleaned up version of your test is attached. I'm still not sure I'm getting everything accounted for here! thanks for testing ! The feature is actually looking quite simple and probably works better as something built in, or at least if we added some methods to QueryContext to ease the burden of caching/copying it. Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. I've done only superficial testing for now, I have to build me a test database, but I thought I'd let you know, it seems to work flawless till now. :^) Thanks -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Sat, Jun 1, 2013 at 10:59 PM, Michael Bayer mike...@zzzcomputing.comwrote: The recipe on the wiki also has the issue that it isn't even caching anything to do with the QueryContext, including all of this information regarding eager joins which is pretty important. Your modifications try to correct for this by storing that context, but then it still creates a brand new context anyway and just transfers not nearly enough of its state over for things to work. Yes, I was trying to keep it minimal to figure out why the error up there, but I did start the way you describe, by storing everything session-independent from the query context. So the whole thing is rolled up into the named thing I referred to also, so that there's no need to keep a Query object hanging around, when we say bake() we're really just referring to a position in the code somewhere, so I've updated the wiki recipe to use a named system like this: q = s.query(Foo).\ filter(Foo.data == bindparam('foo')).\ bake_as(foo, cache) result = q.params(foo='data 12').all() A highly cleaned up version of your test is attached. I'm still not sure I'm getting everything accounted for here! thanks for testing ! The feature is actually looking quite simple and probably works better as something built in, or at least if we added some methods to QueryContext to ease the burden of caching/copying it. Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. It's so straightforward at this point I'm leaning towards some more cleanup and adding to 0.9. I'll probably cache the compiled statements on the QueryContext itself too, and the cache dictionary will be via Session / Sessionmaker. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Mon, Jun 3, 2013 at 1:55 AM, Michael Bayer mike...@zzzcomputing.comwrote: Well, if that works, it certainly covers my needs so there would be no pressing need to incorporate it into the core. I'll let you know tomorrow. It's so straightforward at this point I'm leaning towards some more cleanup and adding to 0.9. I'll probably cache the compiled statements on the QueryContext itself too, and the cache dictionary will be via Session / Sessionmaker. Bear in mind that the use case I'm not only now, but always facing, involves queries in many, different, short-lived sessions. So making it Session-local won't live long enough, unless it can be given to the Sessionmaker itself and have that caching context be shared among all sessions. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
the detached error is because even though the baked query emits the correct SQL with the LEFT OUTER JOIN, there are callables that are present inside the QueryContext that are tailored to look specifically for a particular alias() of the mapped Address table, which is not the same alias() object that's in your cached query - so the joined eager loader for User.address looks in the row for its columns, sees that they're not there (since the ORM targets column rows by Column() object), and doesn't populate the address attribute. So the attribute remains unloaded until you access it where you get your detached error. The use labels error that you got early on was due to the fact that the wiki recipe was for some silly reason using self.statement to get at the statement instead of the context.statement it just generated, not sure what that was about. The recipe on the wiki also has the issue that it isn't even caching anything to do with the QueryContext, including all of this information regarding eager joins which is pretty important. Your modifications try to correct for this by storing that context, but then it still creates a brand new context anyway and just transfers not nearly enough of its state over for things to work. As the comment on the wiki suggested, I'm not seeing any issue if we just cache the whole QueryContext and then just use it again. But there's a few things we have to be careful of, one is that the QueryContext holds onto the Query and Session that it's related to, so we delete those before caching. Then, we make a shallow copy of it when we actually want to return a usable QueryContext and poke on the current Query/Session, and also copy the attributes dictionary just in case some loader wants to mess with things in there too (and for some reason there's a naming inconsistency with that dictionary too it seems I haven't fixed yet). Besides the Session being stuck on the QueryContext, there's a numeric counter called a runid that gets stuck onto it at loading time that should only be used once. So the whole thing is rolled up into the named thing I referred to also, so that there's no need to keep a Query object hanging around, when we say bake() we're really just referring to a position in the code somewhere, so I've updated the wiki recipe to use a named system like this: q = s.query(Foo).\ filter(Foo.data == bindparam('foo')).\ bake_as(foo, cache) result = q.params(foo='data 12').all() A highly cleaned up version of your test is attached. I'm still not sure I'm getting everything accounted for here! thanks for testing ! The feature is actually looking quite simple and probably works better as something built in, or at least if we added some methods to QueryContext to ease the burden of caching/copying it. On May 31, 2013, at 4:40 PM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 4:47 PM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 4:44 PM, Michael Bayer mike...@zzzcomputing.com wrote: can you just attach a working .py script How does that work without a database? Ok, I took one of SQLA's tests, and make it break ;) Notice the problem here is that I close the session after querying. Since the baked query has a joinedload, it shouldn't matter, but it does, because when baking, eager loads are broken somehow. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. test_baked.py -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. On May 31, 2013, at 4:40 PM, Claudio Freire klaussfre...@gmail.com wrote:On Fri, May 31, 2013 at 4:47 PM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 4:44 PM, Michael Bayer mike...@zzzcomputing.com wrote: can you just attach a working .py scriptHow does that work without a database? Ok, I took one of SQLA's tests, and make it break ;)Notice the problem here is that I close the session after querying. Since the baked query has a joinedload, it shouldn't matter, but it does, because when baking, eager loads are broken somehow. -- You received this message because you are subscribed to the Google Groups
Re: [sqlalchemy] Query and compiled_cache
On Thu, May 30, 2013 at 7:04 PM, Michael Bayer mike...@zzzcomputing.com wrote: The hashing thing really has to start as a core concept first. It's a big job but would be very helpful for caching scenarios and would allow us to build this feature on Query without too much difficulty. The nice thing about unhashable is that simple queries will be hashable, but as soon as complexity increases you'd start seeing unhashables come in, preventing us from caching something that isn't actually easy to cache. AFAIK only py3 has support for making user classes unhashable. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Friday, May 31, 2013 10:18:41 AM UTC-4, Klauss wrote: On Thu, May 30, 2013 at 7:04 PM, Michael Bayer mik...@zzzcomputing.comjavascript: wrote: The hashing thing really has to start as a core concept first. It's a big job but would be very helpful for caching scenarios and would allow us to build this feature on Query without too much difficulty. The nice thing about unhashable is that simple queries will be hashable, but as soon as complexity increases you'd start seeing unhashables come in, preventing us from caching something that isn't actually easy to cache. AFAIK only py3 has support for making user classes unhashable. I'm not considering using `__hash__()` for this, I'd rather keep it as a special method for this purpose. But after sleeping on it, I'm still pretty skeptical, because it's actually pretty difficult to determine what parts of a statement will remain constant across backends. If you have a select like, SELECT x + ? FROM q, where ? is a bound parameter, that statement won't run on some backends which don't allow bound parameters in the columns clause. So a select() object select([x + 3]), we would theoretically have to include the number 3 as part of its cache key...but based on where the 3 is present. Similar things happen when you say select().limit(x) - LIMIT can usually be rendered via bound parameter, but not on backends like Sybase or SQL Server where it is rendered in the TOP clause that can't be bound. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, May 31, 2013 at 11:29 AM, Michael Bayer mike...@zzzcomputing.com wrote: On Friday, May 31, 2013 10:18:41 AM UTC-4, Klauss wrote: On Thu, May 30, 2013 at 7:04 PM, Michael Bayer mik...@zzzcomputing.com wrote: The hashing thing really has to start as a core concept first. It's a big job but would be very helpful for caching scenarios and would allow us to build this feature on Query without too much difficulty. The nice thing about unhashable is that simple queries will be hashable, but as soon as complexity increases you'd start seeing unhashables come in, preventing us from caching something that isn't actually easy to cache. AFAIK only py3 has support for making user classes unhashable. I'm not considering using `__hash__()` for this, I'd rather keep it as a special method for this purpose. But after sleeping on it, I'm still pretty skeptical, because it's actually pretty difficult to determine what parts of a statement will remain constant across backends. It's not necessary to be constant across backends. All of compiled_cache machinery already appends the dialect so only semantically constant would be required. If you have a select like, SELECT x + ? FROM q, where ? is a bound parameter, that statement won't run on some backends which don't allow bound parameters in the columns clause. So a select() object select([x + 3]), we would theoretically have to include the number 3 as part of its cache key...but based on where the 3 is present. Similar things happen when you say select().limit(x) - LIMIT can usually be rendered via bound parameter, but not on backends like Sybase or SQL Server where it is rendered in the TOP clause that can't be bound. So yeah, you don't have to care about that. It's taken care at other levels. Hashing should be concerned with semantics only. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 31, 2013, at 10:51 AM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 11:29 AM, Michael Bayer mike...@zzzcomputing.com wrote: On Friday, May 31, 2013 10:18:41 AM UTC-4, Klauss wrote: On Thu, May 30, 2013 at 7:04 PM, Michael Bayer mik...@zzzcomputing.com wrote: The hashing thing really has to start as a core concept first. It's a big job but would be very helpful for caching scenarios and would allow us to build this feature on Query without too much difficulty. The nice thing about unhashable is that simple queries will be hashable, but as soon as complexity increases you'd start seeing unhashables come in, preventing us from caching something that isn't actually easy to cache. AFAIK only py3 has support for making user classes unhashable. I'm not considering using `__hash__()` for this, I'd rather keep it as a special method for this purpose. But after sleeping on it, I'm still pretty skeptical, because it's actually pretty difficult to determine what parts of a statement will remain constant across backends. It's not necessary to be constant across backends. All of compiled_cache machinery already appends the dialect so only semantically constant would be required. if I have a query: q = s.query(X).filter_by(foo='bar').limit(3) say we have it generate a hash: X._hash = x_hash X.foo._hash = foo_hash operator.eq._hash = eq_hash bindparam('foo')._hash = bp_foo_hash bindparam('limit')._hash = limit_hash the hash is: hash(x_hash, foo_hash, eq_hash, bp_foo_hash, limit_hash) this hash works for backends that can render LIMIT as a bound parameter. It will *not* work for SQL server which cannot render LIMIT as a bound parameter. If the hash is determined at the level of Query, we *do not* know whether or not the backend supports LIMIT as a bound parameter, unless we ask it. So OK, we make more rules - ask the backend if LIMIT can be hashed as a bound parameter. But then what if a particular backend has more restrictive bound parameter rules than Query is aware of? What if we throw the Firebird backend at it, and all the sudden Firebird has some quirk where you can't put a bound parameter inside of a CASE statement inside of the columns clause? How will Query know that suddenly another deeply embedded bound parameter can no longer be considered hashable as a bound parameter, and must be hashed as a literal value ? The only way that can work generically, is if Query *never* bypasses literals. it means the above query would have to be hashed like this: X._hash = x_hash X.foo._hash = foo_hash operator.eq._hash = eq_hash 'bar'.hash = bar_hash 3._hash = 3_hash the hash is: hash(x_hash, foo_hash, eq_hash, bar_hash, 3_hash) which means the only way you can get parameter hashing is if you write your Query like this: q = s.query(X).filter_by(foo=bindparam('foo')).limit(bindparam('limit')).params(foo='bar', limit=3) which means the behavior isn't very transparent at all, and if we are leaning on the user to explicitly define that things are hashable, we might as well stick with bake() or something klunky like that which nobody will ever use. Exposing a confusing and awkward conditional performance API to the userbase is not good enough to warrant changing the internals. People are confused enough by yield_per(). If you have a select like, SELECT x + ? FROM q, where ? is a bound parameter, that statement won't run on some backends which don't allow bound parameters in the columns clause. So a select() object select([x + 3]), we would theoretically have to include the number 3 as part of its cache key...but based on where the 3 is present. Similar things happen when you say select().limit(x) - LIMIT can usually be rendered via bound parameter, but not on backends like Sybase or SQL Server where it is rendered in the TOP clause that can't be bound. So yeah, you don't have to care about that. It's taken care at other levels. Hashing should be concerned with semantics only. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, May 31, 2013 at 12:46 PM, Michael Bayer mike...@zzzcomputing.com wrote: this hash works for backends that can render LIMIT as a bound parameter. It will *not* work for SQL server which cannot render LIMIT as a bound parameter. If the hash is determined at the level of Query, we *do not* know whether or not the backend supports LIMIT as a bound parameter, unless we ask it. So OK, we make more rules - ask the backend if LIMIT can be hashed as a bound parameter. But then what if a particular backend has more restrictive bound parameter rules than Query is aware of? What if we throw the Firebird backend at it, and all the sudden Firebird has some quirk where you can't put a bound parameter inside of a CASE statement inside of the columns clause? How will Query know that suddenly another deeply embedded bound parameter can no longer be considered hashable as a bound parameter, and must be hashed as a literal value ? I see. The solution is to create a new object type, QueryKey, that contains both the Query and the Dialect, and builds its hash either by invoking Query.hash(dialect), or by visiting it somehow. The underlying problem is that Query cannot decide the hash by itself. Then it shouldn't try to. It should only support building a hash with respect to a specific dialect. The only thing required of it is that the hash be stable within cacheable queries of that dialect, no need to ponder about hasheability across all dialects. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 31, 2013, at 11:59 AM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 12:46 PM, Michael Bayer mike...@zzzcomputing.com wrote: this hash works for backends that can render LIMIT as a bound parameter. It will *not* work for SQL server which cannot render LIMIT as a bound parameter. If the hash is determined at the level of Query, we *do not* know whether or not the backend supports LIMIT as a bound parameter, unless we ask it. So OK, we make more rules - ask the backend if LIMIT can be hashed as a bound parameter. But then what if a particular backend has more restrictive bound parameter rules than Query is aware of? What if we throw the Firebird backend at it, and all the sudden Firebird has some quirk where you can't put a bound parameter inside of a CASE statement inside of the columns clause? How will Query know that suddenly another deeply embedded bound parameter can no longer be considered hashable as a bound parameter, and must be hashed as a literal value ? I see. The solution is to create a new object type, QueryKey, that contains both the Query and the Dialect, and builds its hash either by invoking Query.hash(dialect), or by visiting it somehow. The underlying problem is that Query cannot decide the hash by itself. Then it shouldn't try to. It should only support building a hash with respect to a specific dialect. The only thing required of it is that the hash be stable within cacheable queries of that dialect, no need to ponder about hasheability across all dialects. right. which becomes - you have to entirely build out the select() statement *and* and run it through a system that looks very much like the compiler - every time! in order to find edge cases like, we can't use a bound parameter inside of a CASE() inside the columns clause essentially means a process that is very close to the complexity as the full compilation has to proceed. All dialects need to be enhanced in order to support this whole new system, or if we piggyback it onto the existing compilation process, then we're not saving anything at all - and we've already lost the savings of skipping Query._compile_context(). OTOH, if we keep this as bake(), forego Query producing hash keys, and just make it so that bake() can accept a string: q = query(Entity).filter_by(foo=bar).limit(5).bake(my_query) then we can just have Query look up the _compile_context() result for my_query, which takes you right to your dialect-compiled select() constructs which expose which parameters are bound, and then you get all the savings. This is just a few lines different to the existing bake() recipe. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Thu, May 30, 2013 at 3:28 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 11:25 AM, Claudio Freire klaussfre...@gmail.com wrote: That way, one could use the second form up there and benefit from query hashing, because session/param binding wouldn't change the hash, and it would be a cache hit. Has it been explored already? Or maybe there's something wrong on how I'm using the compiled_cache thing? Should I start patching? ;-) there is a very wide gap between working with the baked query recipe, for which I'm not aware of all the errors you refer to so more detail would help Um... I don't remember the exact problems, will have to try it again. I just assumed it was targeted at an older SA release and gave up on it rather quickly. So, this is what I'm getting, which is weird: Traceback (most recent call last): File stdin, line 1, in module File /usr/local/lib64/python2.7/site-packages/chorde-0.1-py2.7-linux-x86_64.egg/chorde/decorators.py, line 184, in cached_f rv = f(*p, **kw) File metrics.py, line 291, in action_metrics if action is not None and action.campaign is not None: File /usr/local/lib64/python2.7/site-packages/SQLAlchemy-0.7.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/attributes.py, line 458, in get value = self.callable_(state, passive) File /usr/local/lib64/python2.7/site-packages/SQLAlchemy-0.7.10-py2.7-linux-x86_64.egg/sqlalchemy/orm/strategies.py, line 481, in _load_for_state (mapperutil.state_str(state), self.key) sqlalchemy.orm.exc.DetachedInstanceError: Parent instance Action at 0x36f04d0 is not bound to a Session; lazy load operation of attribute 'campaign' cannot proceed With: class CacheableQuery(sqlalchemy.orm.query.Query): def __init__(self, *p, **kw): self._cached_context = None self._cached_context_labels = None super(CacheableQuery, self).__init__(*p, **kw) @sqlalchemy.orm.query._generative() def bake(self, labels=True): self._compile_context(labels) def _compile_context(self, labels=True): if self._cached_context and self._cached_context_labels == labels: context = super(CacheableQuery, self)._compile_context(labels) cached = self._cached_context context.statement = cached.statement else: context = super(CacheableQuery, self)._compile_context(labels) self._cached_context_labels = labels self._cached_context = context return context All the rest in baked query recipe is already supported by SA, so I didn't include it. Like with_session instead of from_session, and execution_options() instead of hardcoding a cache. Again, the usage is q = blabla.bake() ... q.with_session(S).params(..).first() I have no idea why replacing the query messes the entity's session. Any clue? -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, May 31, 2013 at 4:03 PM, Claudio Freire klaussfre...@gmail.comwrote: With: class CacheableQuery(sqlalchemy.orm.query.Query): def __init__(self, *p, **kw): self._cached_context = None self._cached_context_labels = None super(CacheableQuery, self).__init__(*p, **kw) @sqlalchemy.orm.query._generative() def bake(self, labels=True): self._compile_context(labels) def _compile_context(self, labels=True): if self._cached_context and self._cached_context_labels == labels: context = super(CacheableQuery, self)._compile_context(labels) cached = self._cached_context context.statement = cached.statement else: context = super(CacheableQuery, self)._compile_context(labels) self._cached_context_labels = labels self._cached_context = context return context Damn gmail With: . class CacheableQuery(sqlalchemy.orm.query.Query): . def __init__(self, *p, **kw): . self._cached_context = None . self._cached_context_labels = None . super(CacheableQuery, self).__init__(*p, **kw) . @sqlalchemy.orm.query._generative() . def bake(self, labels=True): . self._compile_context(labels) . . def _compile_context(self, labels=True): . if self._cached_context and self._cached_context_labels == labels: . context = super(CacheableQuery, self)._compile_context(labels) . cached = self._cached_context . context.statement = cached.statement . else: . context = super(CacheableQuery, self)._compile_context(labels) . self._cached_context_labels = labels . self._cached_context = context . . return context -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
can you just attach a working .py script On May 31, 2013, at 3:06 PM, Claudio Freire klaussfre...@gmail.com wrote: On Fri, May 31, 2013 at 4:03 PM, Claudio Freire klaussfre...@gmail.com wrote: With: class CacheableQuery(sqlalchemy.orm.query.Query): def __init__(self, *p, **kw): self._cached_context = None self._cached_context_labels = None super(CacheableQuery, self).__init__(*p, **kw) @sqlalchemy.orm.query._generative() def bake(self, labels=True): self._compile_context(labels) def _compile_context(self, labels=True): if self._cached_context and self._cached_context_labels == labels: context = super(CacheableQuery, self)._compile_context(labels) cached = self._cached_context context.statement = cached.statement else: context = super(CacheableQuery, self)._compile_context(labels) self._cached_context_labels = labels self._cached_context = context return context Damn gmail With: . class CacheableQuery(sqlalchemy.orm.query.Query): . def __init__(self, *p, **kw): . self._cached_context = None . self._cached_context_labels = None . super(CacheableQuery, self).__init__(*p, **kw) . @sqlalchemy.orm.query._generative() . def bake(self, labels=True): . self._compile_context(labels) . . def _compile_context(self, labels=True): . if self._cached_context and self._cached_context_labels == labels: . context = super(CacheableQuery, self)._compile_context(labels) . cached = self._cached_context . context.statement = cached.statement . else: . context = super(CacheableQuery, self)._compile_context(labels) . self._cached_context_labels = labels . self._cached_context = context . . return context -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, May 31, 2013 at 4:44 PM, Michael Bayer mike...@zzzcomputing.comwrote: can you just attach a working .py script How does that work without a database? -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Fri, May 31, 2013 at 4:47 PM, Claudio Freire klaussfre...@gmail.comwrote: On Fri, May 31, 2013 at 4:44 PM, Michael Bayer mike...@zzzcomputing.comwrote: can you just attach a working .py script How does that work without a database? Ok, I took one of SQLA's tests, and make it break ;) Notice the problem here is that I close the session after querying. Since the baked query has a joinedload, it shouldn't matter, but it does, because when baking, eager loads are broken somehow. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. test_baked.py Description: Binary data
[sqlalchemy] Query and compiled_cache
I know this has been discussed a great deal already, but I've been noticing this: SomeClass.query() .filter_by(id = blah) .execution_options(compiled_cache = _orm_query_cache) .options( orm.joinedload(blah), orm.joinedload(blah, bleh), orm.joinedload(blah, bleh, blih), orm.joinedload(blah, bleh, blih, bloh), ).first() It adds one entry in _orm_query_cache each time I run it. The cache is a dict-like LRUCache object I intend to have in the running application to avoid recompiling queries all the time. I've also tried it like this: _some_query = SomeClass.query() .filter_by(id = bindparam(bleh)) .execution_options(compiled_cache = _orm_query_cache) .options( orm.joinedload(blah), orm.joinedload(blah, bleh), orm.joinedload(blah, bleh, blih), orm.joinedload(blah, bleh, blih, bloh), ) globally, and in my hot function: _some_query.with_session(session).params(bleh = bleh).first() Both ways behave the same with regard to the cache. It will be in a very hot path, I know compilation CPU usage pales in comparison with the roundtrip, but since this is such a hot path, I expect compiling repeatedly to be troublesome: I expect to have an unholy amount of concurrency (in the order of 10k rps), I don't want a few of these compiling bogging even minutely other concurrent transactions, the application has very stringent latency requirements, and even a small hiccup would be unacceptable. I'm on latest 0.7, also tried 0.8. I tried the baked query recipe, and it raises exceptions everywhere. I tried to patch it up, and it didn't work (it would cache results as well which was really unintended). But never mind baked query's brokenness, from what I've seen, and read in other threads in the archive, the problem is queries are appended to the cache by object identity. I'm thinking it wouldn't be hard making the Query object hasheable, or at least adding a bake() method that freezes it for hashing (prohibits hash-changing alterations?). That way, one could use the second form up there and benefit from query hashing, because session/param binding wouldn't change the hash, and it would be a cache hit. Has it been explored already? Or maybe there's something wrong on how I'm using the compiled_cache thing? Should I start patching? ;-) -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 30, 2013, at 11:25 AM, Claudio Freire klaussfre...@gmail.com wrote: That way, one could use the second form up there and benefit from query hashing, because session/param binding wouldn't change the hash, and it would be a cache hit. Has it been explored already? Or maybe there's something wrong on how I'm using the compiled_cache thing? Should I start patching? ;-) there is a very wide gap between working with the baked query recipe, for which I'm not aware of all the errors you refer to so more detail would help, and that of patching SQLAlchemy with new features. In particular I can't imagine how it would cache results, there is nothing like that in the recipe or compiled cache system. If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: 1. there is no bake() method, the entire thing is transparent 2. as one calls session.query(X).filter(Y).join(Z), the hashing scheme is working the entire time in the background against a single LRU cache, that is perhaps per sessionmaker(), or otherwise, to produce cached versions of the Query. Everyone would get the performance gains for free in all cases. A flag or other system would exist to turn the feature off for those who are heavily subclassing Query and having issues or other edge backwards compatible issues. 3. it would just be on by default in a new major release like 0.9 or 1.0 4. it would have a super crapload of very complete and clean unit tests. Otherwise, the bake() recipe as it is can be enhanced or augmented with __hash__() methods and all that but I'm not aware of anything regarding it that would require changes to SQLAlchemy itself, since it uses a Query subclass. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 11:25 AM, Claudio Freire klaussfre...@gmail.com wrote: That way, one could use the second form up there and benefit from query hashing, because session/param binding wouldn't change the hash, and it would be a cache hit. Has it been explored already? Or maybe there's something wrong on how I'm using the compiled_cache thing? Should I start patching? ;-) there is a very wide gap between working with the baked query recipe, for which I'm not aware of all the errors you refer to so more detail would help Um... I don't remember the exact problems, will have to try it again. I just assumed it was targeted at an older SA release and gave up on it rather quickly. and that of patching SQLAlchemy with new features. In particular I can't imagine how it would cache results, there is nothing like that in the recipe or compiled cache system. Something with sharing the context among query instances, since query instances cache results (or that I heard). Not sure about the specifics of how that happened, but I did notice, after patching up the recipe to not throw exceptions, that it would always return the results for the first call. The queries themselves had no bind parameters or anything, they would just return different results each time due to concurrent updates to the database. Maybe that wasn't expected for that recipe? In any case, I just gave up on it without looking into it much. If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: After, I didn't forget, just real life real work priorities made me veer away from it. Since it was for 0.9, I judged I could safely delay 2720 a bit while I take care of work related priorities ;-) 1. there is no bake() method, the entire thing is transparent 2. as one calls session.query(X).filter(Y).join(Z), the hashing scheme is working the entire time in the background against a single LRU cache, that is perhaps per sessionmaker(), or otherwise, to produce cached versions of the Query. Everyone would get the performance gains for free in all cases. A flag or other system would exist to turn the feature off for those who are heavily subclassing Query and having issues or other edge backwards compatible issues. That would mean Query objects would by default take the compiled_cache from the session, and not only during flushes but always. If that's alright, sure. 3. it would just be on by default in a new major release like 0.9 or 1.0 I had assumed that much. 4. it would have a super crapload of very complete and clean unit tests. Ehm... I would imagine all the current tests involving Query would cover most of it. A few more cache-specific tests could be added surely, but only to check caching is indeed happening, correctness should be checked by existing tests already. Otherwise, the bake() recipe as it is can be enhanced or augmented with __hash__() methods and all that but I'm not aware of anything regarding it that would require changes to SQLAlchemy itself, since it uses a Query subclass. Well, yeah, I guess so. But that subclass would have to step on all of Query methods to be able to compute, cache and update the hash (computing it always would be almost as costly as compiling, so it has to be cached in an instance attribute). That'd be a chore, and it would break every other release. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: 4. it would have a super crapload of very complete and clean unit tests. Ehm... I would imagine all the current tests involving Query would cover most of it. A few more cache-specific tests could be added surely, but only to check caching is indeed happening, correctness should be checked by existing tests already. well the current tests suffer very much from years of being integration tests and not unit tests. These tests would actually do a terrible job of testing this cache, as almost all tests use a brand new mapping, a brand new session, and emit just one query. none of the things that can go wrong with caching, such as the multiple result issue you're describing with the bake() recipe, would be exercised by current tests.Memory leaks, subtle changes in queries, all that stuff.It also sort of depends on how the feature comes out, how hard it will be to verify its correctness. Otherwise, the bake() recipe as it is can be enhanced or augmented with __hash__() methods and all that but I'm not aware of anything regarding it that would require changes to SQLAlchemy itself, since it uses a Query subclass. Well, yeah, I guess so. But that subclass would have to step on all of Query methods to be able to compute, cache and update the hash (computing it always would be almost as costly as compiling, so it has to be cached in an instance attribute). That'd be a chore, and it would break every other release. Can't a Query generate its hash from its current state, without generative methods being called ? Otherwise, the generative methods do run through a common system, which is the @_generative decorator. I might guess that the way Query stores its state could be changed to make this whole job easier.That is, if it stored state using a whole system of objects with a known combined hashing scheme, rather than ad-hoc things like _whereclause, _order_by , etc. I'm not sure how to do this but it would certainly be some kind of system that is largely transparent even to the internals - the internals might need to change how they represent state changes, but there wouldn't be any explicit mention of hashes inside of say filter_by(), for example. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
my first 25 seconds of looking at this reveals that if you want to be able to generate a hash, this has to go all the way down to everything. query.filter(X == Y) means you need a hash for X == Y too.These hashes are definitely going to be determined using a traversal scheme for sure: q = X == Y q._magic_hash_value_() will ask X, operator.eq, Y, for their hash values (X and Y assuming they are Column objects are considered to be immutable, even though they can be copies of X and Y sometimes with different semantics), and combine them together. So some_select_statement._magic_hash_value_() would traverse all the way down as well. This is why object identity was a lot easier to work with. On May 30, 2013, at 3:05 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: After, I didn't forget, just real life real work priorities made me veer away from it. Since it was for 0.9, I judged I could safely delay 2720 a bit while I take care of work related priorities ;-) also, I find an overhaul to Query such that it's self-hashing a lot more interesting than #2720. It would be a much bigger performance savings and it would apply to other interpreters like pypy too.Replacements of tiny sections of code with C, not that interesting :) (redoing all the C in pyrex is more interesting but not necessarily a priority). -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Thu, May 30, 2013 at 3:45 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: 4. it would have a super crapload of very complete and clean unit tests. Ehm... I would imagine all the current tests involving Query would cover most of it. A few more cache-specific tests could be added surely, but only to check caching is indeed happening, correctness should be checked by existing tests already. well the current tests suffer very much from years of being integration tests and not unit tests. These tests would actually do a terrible job of testing this cache, as almost all tests use a brand new mapping, a brand new session, and emit just one query. none of the things that can go wrong with caching, such as the multiple result issue you're describing with the bake() recipe, would be exercised by current tests.Memory leaks, subtle changes in queries, all that stuff.It also sort of depends on how the feature comes out, how hard it will be to verify its correctness. Um... that might make the task a lot bigger than it should be. I'll have to look into it. Otherwise, the bake() recipe as it is can be enhanced or augmented with __hash__() methods and all that but I'm not aware of anything regarding it that would require changes to SQLAlchemy itself, since it uses a Query subclass. Well, yeah, I guess so. But that subclass would have to step on all of Query methods to be able to compute, cache and update the hash (computing it always would be almost as costly as compiling, so it has to be cached in an instance attribute). That'd be a chore, and it would break every other release. Can't a Query generate its hash from its current state, without generative methods being called ? Otherwise, the generative methods do run through a common system, which is the @_generative decorator. I'm not so familiar with Query internals yet to answer this. But I'll look into it. I've been thinking, that if caching is conditional on nothing structural about the query changing, and if we just want to support that pattern I mentioned above (where you have a global query object from which you build session-bound ones with with_session), it could be as cheap as taking the internals' identity as hash. That wouldn't work for the usual query building patterns, but then again, when you build a new object, you're already paying a cost similar to compiling, so caching would only really benefit the case where you cache the expression externally. In case I'm not clear, this would not be cached if I were to take id(internals) query(Blah).filter(blah).join(blah).first() But I don't care, because that's expensive on its own. This would: q = query(Blah).filter(blah).join(blah) ... q2 = q.with_session(S).params(blah).first() On Thu, May 30, 2013 at 4:05 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: After, I didn't forget, just real life real work priorities made me veer away from it. Since it was for 0.9, I judged I could safely delay 2720 a bit while I take care of work related priorities ;-) also, I find an overhaul to Query such that it's self-hashing a lot more interesting than #2720. It would be a much bigger performance savings and it would apply to other interpreters like pypy too.Replacements of tiny sections of code with C, not that interesting :) (redoing all the C in pyrex is more interesting but not necessarily a priority). The C extension is already done, and I think I sent the latest version, haven't I? The only thing remaining of 2720 is turning it all into pyrex code. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: After, I didn't forget, just real life real work priorities made me veer away from it. Since it was for 0.9, I judged I could safely delay 2720 a bit while I take care of work related priorities ;-) also, I find an overhaul to Query such that it's self-hashing a lot more interesting than #2720. It would be a much bigger performance savings and it would apply to other interpreters like pypy too.Replacements of tiny sections of code with C, not that interesting :) (redoing all the C in pyrex is more interesting but not necessarily a priority). -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On May 30, 2013, at 3:10 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 3:45 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: 4. it would have a super crapload of very complete and clean unit tests. Ehm... I would imagine all the current tests involving Query would cover most of it. A few more cache-specific tests could be added surely, but only to check caching is indeed happening, correctness should be checked by existing tests already. well the current tests suffer very much from years of being integration tests and not unit tests. These tests would actually do a terrible job of testing this cache, as almost all tests use a brand new mapping, a brand new session, and emit just one query. none of the things that can go wrong with caching, such as the multiple result issue you're describing with the bake() recipe, would be exercised by current tests.Memory leaks, subtle changes in queries, all that stuff.It also sort of depends on how the feature comes out, how hard it will be to verify its correctness. Um... that might make the task a lot bigger than it should be. I'll have to look into it. if you look at the history of major feature adds, the actual cleverness to implement the feature is really like 10% of the effort. its about the tests, the documentation, the upgrade paths, all of that. If all I did was throw balls of clever at SQLAlchemy all day, and I have plenty, it would be a huge pile of colorful mud.All the support work is the price we pay for the joy of clever new features. Can't a Query generate its hash from its current state, without generative methods being called ? Otherwise, the generative methods do run through a common system, which is the @_generative decorator. I'm not so familiar with Query internals yet to answer this. But I'll look into it. I've been thinking, that if caching is conditional on nothing structural about the query changing, and if we just want to support that pattern I mentioned above (where you have a global query object from which you build session-bound ones with with_session), it could be as cheap as taking the internals' identity as hash. That wouldn't work for the usual query building patterns, but then again, when you build a new object, you're already paying a cost similar to compiling, so caching would only really benefit the case where you cache the expression externally. In case I'm not clear, this would not be cached if I were to take id(internals) query(Blah).filter(blah).join(blah).first() But I don't care, because that's expensive on its own. result caching (because we're calling first()) is a whole different thing. the dogpile.cache recipe addresses that - its a widely used recipe and sure, that would make a great feature too, I keep it as a recipe so that people who use it have spent time understanding it. Recipes are for features that I don't have time to make 300% bulletproof, basically. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
On Thu, May 30, 2013 at 4:19 PM, Michael Bayer mike...@zzzcomputing.com wrote: In case I'm not clear, this would not be cached if I were to take id(internals) query(Blah).filter(blah).join(blah).first() But I don't care, because that's expensive on its own. result caching (because we're calling first()) is a whole different thing. the dogpile.cache recipe addresses that - its a widely used recipe and sure, that would make a great feature too, I keep it as a recipe so that people who use it have spent time understanding it. Oh no, I was referring to the query not the result. Results will never be cached with what I propose. -- You received this message because you are subscribed to the Google Groups sqlalchemy group. To unsubscribe from this group and stop receiving emails from it, send an email to sqlalchemy+unsubscr...@googlegroups.com. To post to this group, send email to sqlalchemy@googlegroups.com. Visit this group at http://groups.google.com/group/sqlalchemy?hl=en. For more options, visit https://groups.google.com/groups/opt_out.
Re: [sqlalchemy] Query and compiled_cache
a very brief example of this, which if you keep digging in you can see how tricky it gets (fast), is like: from sqlalchemy.sql import column, table t1 = table('t1', column('x'), column('y')) t2 = table('t1', column('x'), column('y')) t3 = table('t2', column('p'), column('r')) t4 = table('t2', column('r'), column('p')) assert t1._hash == t2._hash assert t3._hash != t4._hash the patch to produce the above is below. Note that Table/Column are easier to hash than table()/column(), since we treat the upper class versions as singletons. There is a lot more state that needs to be taken into account though, like the _annotations dictionary on every ClauseElement. In the case where an element doesn't define a fixed _hash, the usage of a new instance of that element in an ad-hoc Query means that whole Query can't be cached, because the element would have a different id each time (though dangerously, that id() can be reused when the original is garbage collected...that's an issue actually, we might instead need to use a counter for that case). diff --git a/lib/sqlalchemy/sql/expression.py b/lib/sqlalchemy/sql/expression.py index 5820cb1..d5de299 100644 --- a/lib/sqlalchemy/sql/expression.py +++ b/lib/sqlalchemy/sql/expression.py @@ -1669,6 +1669,7 @@ class ClauseElement(Visitable): c = self.__class__.__new__(self.__class__) c.__dict__ = self.__dict__.copy() +c.__dict__.pop('_hash', None) ClauseElement._cloned_set._reset(c) ColumnElement.comparator._reset(c) @@ -1681,6 +1682,10 @@ class ClauseElement(Visitable): return c +@util.memoized_property +def _hash(self): +return id(self) + @property def _constructor(self): return the 'constructor' for this ClauseElement. @@ -2421,6 +2426,10 @@ class ColumnCollection(util.OrderedProperties): self._data.update((c.key, c) for c in cols) self.__dict__['_all_cols'] = util.column_set(self) +@util.memoized_property +def _hash(self): +return hash(tuple(c._hash for c in self)) + def __str__(self): return repr([str(c) for c in self]) @@ -4432,6 +4441,17 @@ class ColumnClause(Immutable, ColumnElement): self.type = sqltypes.to_instance(type_) self.is_literal = is_literal +@util.memoized_property +def _hash(self): +return hash( +( +hash(self.key), +hash(self.table.name), # note using self.table here causes an endless loop +self.type._hash, +hash(self.is_literal) +) +) + def _compare_name_for_result(self, other): if self.is_literal or \ self.table is None or \ @@ -4586,6 +4606,15 @@ class TableClause(Immutable, FromClause): for c in columns: self.append_column(c) +@util.memoized_property +def _hash(self): +return hash( +( +hash(self.name), +self._columns._hash, +) +) + def _init_collections(self): pass diff --git a/lib/sqlalchemy/types.py b/lib/sqlalchemy/types.py index bfff053..16834d1 100644 --- a/lib/sqlalchemy/types.py +++ b/lib/sqlalchemy/types.py @@ -59,6 +59,10 @@ class TypeEngine(AbstractType): def __reduce__(self): return _reconstitute_comparator, (self.expr, ) +@property +def _hash(self): +return id(self) # default to the same value as __hash__() if a specific hash is not defined + hashable = True Flag, if False, means values from this type aren't hashable. On May 30, 2013, at 3:10 PM, Michael Bayer mike...@zzzcomputing.com wrote: my first 25 seconds of looking at this reveals that if you want to be able to generate a hash, this has to go all the way down to everything. query.filter(X == Y) means you need a hash for X == Y too.These hashes are definitely going to be determined using a traversal scheme for sure: q = X == Y q._magic_hash_value_() will ask X, operator.eq, Y, for their hash values (X and Y assuming they are Column objects are considered to be immutable, even though they can be copies of X and Y sometimes with different semantics), and combine them together. So some_select_statement._magic_hash_value_() would traverse all the way down as well. This is why object identity was a lot easier to work with. On May 30, 2013, at 3:05 PM, Michael Bayer mike...@zzzcomputing.com wrote: On May 30, 2013, at 2:28 PM, Claudio Freire klaussfre...@gmail.com wrote: On Thu, May 30, 2013 at 2:25 PM, Michael Bayer mike...@zzzcomputing.com wrote: If you want to work on a feature that is actually going to change SQLAlchemy, (and would that be before or after you finish #2720? :) ), it would be: After, I didn't forget, just real life real work
Re: [sqlalchemy] Query and compiled_cache
my next thought is, if something isn't distinctly hashable, then it should cancel being hashable entirely. this patch shows it using a symbol unhashable: https://gist.github.com/zzzeek/5681612 . If any construct has an unhashable inside of it, then that construct is unhashable too. The hashing thing really has to start as a core concept first. It's a big job but would be very helpful for caching scenarios and would allow us to build this feature on Query without too much difficulty. The nice thing about unhashable is that simple queries will be hashable, but as soon as complexity increases you'd start seeing unhashables come in, preventing us from caching something that isn't actually easy to cache. this could be really nice, could be a nice 0.9 focus, as I haven't found 0.9's big change yet (other than the 2to3 removal). On May 30, 2013, at 4:48 PM, Michael Bayer mike...@zzzcomputing.com wrote: a very brief example of this, which if you keep digging in you can see how tricky it gets (fast), is like: from sqlalchemy.sql import column, table t1 = table('t1', column('x'), column('y')) t2 = table('t1', column('x'), column('y')) t3 = table('t2', column('p'), column('r')) t4 = table('t2', column('r'), column('p')) assert t1._hash == t2._hash assert t3._hash != t4._hash the patch to produce the above is below. Note that Table/Column are easier to hash than table()/column(), since we treat the upper class versions as singletons. There is a lot more state that needs to be taken into account though, like the _annotations dictionary on every ClauseElement. In the case where an element doesn't define a fixed _hash, the usage of a new instance of that element in an ad-hoc Query means that whole Query can't be cached, because the element would have a different id each time (though dangerously, that id() can be reused when the original is garbage collected...that's an issue actually, we might instead need to use a counter for that case). diff --git a/lib/sqlalchemy/sql/expression.py b/lib/sqlalchemy/sql/expression.py index 5820cb1..d5de299 100644 --- a/lib/sqlalchemy/sql/expression.py +++ b/lib/sqlalchemy/sql/expression.py @@ -1669,6 +1669,7 @@ class ClauseElement(Visitable): c = self.__class__.__new__(self.__class__) c.__dict__ = self.__dict__.copy() +c.__dict__.pop('_hash', None) ClauseElement._cloned_set._reset(c) ColumnElement.comparator._reset(c) @@ -1681,6 +1682,10 @@ class ClauseElement(Visitable): return c +@util.memoized_property +def _hash(self): +return id(self) + @property def _constructor(self): return the 'constructor' for this ClauseElement. @@ -2421,6 +2426,10 @@ class ColumnCollection(util.OrderedProperties): self._data.update((c.key, c) for c in cols) self.__dict__['_all_cols'] = util.column_set(self) +@util.memoized_property +def _hash(self): +return hash(tuple(c._hash for c in self)) + def __str__(self): return repr([str(c) for c in self]) @@ -4432,6 +4441,17 @@ class ColumnClause(Immutable, ColumnElement): self.type = sqltypes.to_instance(type_) self.is_literal = is_literal +@util.memoized_property +def _hash(self): +return hash( +( +hash(self.key), +hash(self.table.name), # note using self.table here causes an endless loop +self.type._hash, +hash(self.is_literal) +) +) + def _compare_name_for_result(self, other): if self.is_literal or \ self.table is None or \ @@ -4586,6 +4606,15 @@ class TableClause(Immutable, FromClause): for c in columns: self.append_column(c) +@util.memoized_property +def _hash(self): +return hash( +( +hash(self.name), +self._columns._hash, +) +) + def _init_collections(self): pass diff --git a/lib/sqlalchemy/types.py b/lib/sqlalchemy/types.py index bfff053..16834d1 100644 --- a/lib/sqlalchemy/types.py +++ b/lib/sqlalchemy/types.py @@ -59,6 +59,10 @@ class TypeEngine(AbstractType): def __reduce__(self): return _reconstitute_comparator, (self.expr, ) +@property +def _hash(self): +return id(self) # default to the same value as __hash__() if a specific hash is not defined + hashable = True Flag, if False, means values from this type aren't hashable. On May 30, 2013, at 3:10 PM, Michael Bayer mike...@zzzcomputing.com wrote: my first 25 seconds of looking at this reveals that if you want to be able to generate a hash, this has to go all the way down to everything. query.filter(X == Y)