Re: SQLite and callbacks

2011-02-16 Thread Philip Martin
Stefan Sperling s...@elego.de writes:

 [[[
 Improve performance of svn proplist in a similar way as was done in r1039808.
 But, this time, avoid problems with callbacks invoked during sqlite
 transactions by storing results in a temporary table and invoking
 callbacks during a query on the temporary table.

 +read_props_recursive(svn_wc__db_t *db,
 + const char *local_abspath,
 + svn_boolean_t files_only,
 + svn_boolean_t immediates_only,
 + svn_wc__proplist_receiver_t receiver_func,
 + void *receiver_baton,
 + svn_cancel_func_t cancel_func,
 + void *cancel_baton,
 + apr_pool_t *scratch_pool)
  {
svn_wc__db_wcroot_t *wcroot;
 -  const char *local_relpath;
 -  const char *prev_child_relpath;
svn_sqlite__stmt_t *stmt;
 +  cache_props_baton_t baton;
svn_boolean_t have_row;
 -  apr_hash_t *props_per_child;
 -  apr_hash_t *files;
 -  apr_hash_t *not_present;
 -  apr_hash_index_t *hi;
 +  int row_number;
apr_pool_t *iterpool;
  
SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
SVN_ERR_ASSERT(receiver_func);
  
 -  SVN_ERR(svn_wc__db_wcroot_parse_local_abspath(wcroot, local_relpath, db,
 - local_abspath,
 - svn_sqlite__mode_readwrite,
 - scratch_pool, scratch_pool));
 +  SVN_ERR(svn_wc__db_wcroot_parse_local_abspath(wcroot, 
 baton.local_relpath,
 +db, local_abspath,
 +svn_sqlite__mode_readwrite,
 +scratch_pool, scratch_pool));
VERIFY_USABLE_WCROOT(wcroot);
  
 -  props_per_child = apr_hash_make(scratch_pool);
 -  not_present = apr_hash_make(scratch_pool);
 -  if (files_only)
 -files = apr_hash_make(scratch_pool);
 +  SVN_ERR(svn_sqlite__exec_statements(wcroot-sdb,
 +  STMT_CLEAR_NODE_PROPS_CACHE));
 +

As far as I can see there is only one cache, and so there has to be some
sort of serialisation to prevent multiple callers interfering with each
other.  For a write operation that is simple, the working copy locks
will only allow one operation at a time.  proplist doesn't take a lock,
do we need to serialise it somehow?  Should we be using views with
per-transaction names?  If we did how would we clear old views for
clients that exited unexpectedly?

I'm thinking of using something like this for recursive revert. It's
easy to delete WORKING/ACTUAL rows in a single query, but hard to
notify, particularly as the rows don't exist after the revert.  Since
revert is a write operation the working copy lock should serialise
things.


-- 
Philip


Re: SQLite and callbacks

2011-02-16 Thread Branko Čibej
On 16.02.2011 11:52, Philip Martin wrote:
 Stefan Sperling s...@elego.de writes:

 [[[
 Improve performance of svn proplist in a similar way as was done in r1039808.
 But, this time, avoid problems with callbacks invoked during sqlite
 transactions by storing results in a temporary table and invoking
 callbacks during a query on the temporary table.
 +read_props_recursive(svn_wc__db_t *db,
 + const char *local_abspath,
 + svn_boolean_t files_only,
 + svn_boolean_t immediates_only,
 + svn_wc__proplist_receiver_t receiver_func,
 + void *receiver_baton,
 + svn_cancel_func_t cancel_func,
 + void *cancel_baton,
 + apr_pool_t *scratch_pool)
  {
svn_wc__db_wcroot_t *wcroot;
 -  const char *local_relpath;
 -  const char *prev_child_relpath;
svn_sqlite__stmt_t *stmt;
 +  cache_props_baton_t baton;
svn_boolean_t have_row;
 -  apr_hash_t *props_per_child;
 -  apr_hash_t *files;
 -  apr_hash_t *not_present;
 -  apr_hash_index_t *hi;
 +  int row_number;
apr_pool_t *iterpool;
  
SVN_ERR_ASSERT(svn_dirent_is_absolute(local_abspath));
SVN_ERR_ASSERT(receiver_func);
  
 -  SVN_ERR(svn_wc__db_wcroot_parse_local_abspath(wcroot, local_relpath, db,
 - local_abspath,
 - svn_sqlite__mode_readwrite,
 - scratch_pool, scratch_pool));
 +  SVN_ERR(svn_wc__db_wcroot_parse_local_abspath(wcroot, 
 baton.local_relpath,
 +db, local_abspath,
 +svn_sqlite__mode_readwrite,
 +scratch_pool, 
 scratch_pool));
VERIFY_USABLE_WCROOT(wcroot);
  
 -  props_per_child = apr_hash_make(scratch_pool);
 -  not_present = apr_hash_make(scratch_pool);
 -  if (files_only)
 -files = apr_hash_make(scratch_pool);
 +  SVN_ERR(svn_sqlite__exec_statements(wcroot-sdb,
 +  STMT_CLEAR_NODE_PROPS_CACHE));
 +
 As far as I can see there is only one cache, and so there has to be some
 sort of serialisation to prevent multiple callers interfering with each
 other.  For a write operation that is simple, the working copy locks
 will only allow one operation at a time.  proplist doesn't take a lock,
 do we need to serialise it somehow?  Should we be using views with
 per-transaction names?  If we did how would we clear old views for
 clients that exited unexpectedly?

 I'm thinking of using something like this for recursive revert. It's
 easy to delete WORKING/ACTUAL rows in a single query, but hard to
 notify, particularly as the rows don't exist after the revert.  Since
 revert is a write operation the working copy lock should serialise
 things

The single cache is a temporary table, which has distinct per-connection
instances. If some other thread is using the same connection at the same
time, all bets are off anyway, because transactions are per-connection,
too. The cache is filled within a single sqlite transaction, so I assume
that the cached state is consistent. In fact, it's better than before,
when a recursive proplist would work its way throuth any number of
transactions, making the consistency of the result doubtful at best.

-- Brane



Re: SQLite and callbacks

2011-02-16 Thread Julian Foad
On Tue, 2011-02-15, Stefan Sperling wrote:
 [[[
 Improve performance of svn proplist in a similar way as was done in r1039808.
 But, this time, avoid problems with callbacks invoked during sqlite
 transactions by storing results in a temporary table and invoking
 callbacks during a query on the temporary table.
[...]
 ]]]

 Index: subversion/libsvn_wc/wc.h
 ===
 +-- STMT_CACHE_NODE_PROPS_RECURSIVE
 +CREATE TEMPORARY TABLE temp__node_props_cache AS
 +  SELECT local_relpath, kind, properties FROM nodes_current
 +  WHERE wc_id = ?1
 +AND (?2 = '' OR local_relpath = ?2 OR local_relpath LIKE ?2 || '/%')

For correct use of 'LIKE' with arbitrary file names, we need to escape
the pattern (and declare that here), which in turn means the unescaped
pattern and the escaped pattern need to be passed in as two separate
params, I think.  Same again in a similar query below.

 +AND local_relpath NOT IN (
 +  SELECT local_relpath FROM actual_node WHERE wc_id = ?1)

I wonder if this subexpression would be faster rewritten as something
like

   AND NOT (SELECT 1 FROM actual_node
WHERE wc_id = ?1 AND local_relpath = ?2)

(I'm not sure whether NOT (SELECT 1 ...) is the correct or best way to
say this selection is empty, but you get the idea.)


 +AND (presence = 'normal' OR presence = 'incomplete');
 +CREATE UNIQUE INDEX temp__node_props_cache_unique
 +  ON temp__node_props_cache (local_relpath);


- Julian




Re: SQLite and callbacks

2011-02-16 Thread Daniel Shahaf
Julian Foad wrote on Wed, Feb 16, 2011 at 18:25:35 +:
AND NOT (SELECT 1 FROM actual_node
 WHERE wc_id = ?1 AND local_relpath = ?2)
 
 (I'm not sure whether NOT (SELECT 1 ...) is the correct or best way to
 say this selection is empty, but you get the idea.)
 

AND 0 == (SELECT COUNT(*) ) ?


Re: SQLite and callbacks

2011-02-16 Thread Branko Čibej
On 16.02.2011 19:25, Julian Foad wrote:
 On Tue, 2011-02-15, Stefan Sperling wrote:
 [[[
 Improve performance of svn proplist in a similar way as was done in r1039808.
 But, this time, avoid problems with callbacks invoked during sqlite
 transactions by storing results in a temporary table and invoking
 callbacks during a query on the temporary table.
 [...]
 ]]]
 Index: subversion/libsvn_wc/wc.h
 ===
 +-- STMT_CACHE_NODE_PROPS_RECURSIVE
 +CREATE TEMPORARY TABLE temp__node_props_cache AS
 +  SELECT local_relpath, kind, properties FROM nodes_current
 +  WHERE wc_id = ?1
 +AND (?2 = '' OR local_relpath = ?2 OR local_relpath LIKE ?2 || '/%')
 For correct use of 'LIKE' with arbitrary file names, we need to escape
 the pattern (and declare that here), which in turn means the unescaped
 pattern and the escaped pattern need to be passed in as two separate
 params, I think.  Same again in a similar query below.

Good catch. We'll need an escape clause, too.

 +AND local_relpath NOT IN (
 +  SELECT local_relpath FROM actual_node WHERE wc_id = ?1)
 I wonder if this subexpression would be faster rewritten as something
 like

AND NOT (SELECT 1 FROM actual_node
 WHERE wc_id = ?1 AND local_relpath = ?2)

 (I'm not sure whether NOT (SELECT 1 ...) is the correct or best way to
 say this selection is empty, but you get the idea.)

My not very humble opinion -- we can play silly buggers trying to
optimize this bit of the query, but effort would be better spent in
merging NODES and ACTUAL_NODE, which in turn would allow us to drop the
second query altogether and halve the total time needed to populate the
cache table.

-- Brane



Re: SQLite and callbacks

2011-02-16 Thread Stefan Sperling
On Wed, Feb 16, 2011 at 02:26:51PM -0500, Mark Phippard wrote:
 2011/2/16 Branko Čibej br...@e-reka.si:
 
  My not very humble opinion -- we can play silly buggers trying to
  optimize this bit of the query, but effort would be better spent in
  merging NODES and ACTUAL_NODE, which in turn would allow us to drop the
  second query altogether and halve the total time needed to populate the
  cache table.
 
 Are you interested in doing this and just seeing if there are objections?
 
 You have brought it up a couple times and no one is responding.  If
 you think there might be objections maybe it needs a new thread?  If
 it delivers a performance win then I think we should do it.  I do not
 think null column values are going to waste enough disk space to be a
 concern.

+1


Re: SQLite and callbacks

2011-02-15 Thread Stefan Sperling
On Mon, Feb 14, 2011 at 09:48:35PM +0100, Branko Čibej wrote:
 On 14.02.2011 13:37, Stefan Sperling wrote:
  On Tue, Feb 08, 2011 at 11:50:36PM +0100, Branko Čibej wrote:
  Well, here it is, I fixed the thinko in the actual_props query and got
  all prop_tests to pass with this version.
  Just FYI, the patch doesn't apply to HEAD. I'll try to adjust it when
  I find time. But maybe you'll be quicker than me? :)
 
 It got clobbered by Hyrum's changes for pdh checking, if I read the
 conflict diff correctly. Should really be just the one conflict in
 wc_db.c, probably. I'll see if I can find time to update the patch, but
 I can't promise anything, got my hands full right now.

Updated version:

Index: subversion/libsvn_wc/props.c
===
--- subversion/libsvn_wc/props.c(revision 1070853)
+++ subversion/libsvn_wc/props.c(working copy)
@@ -1711,6 +1711,7 @@ read_dir_props(const char *local_abspath,
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
   b-receiver_func,
   b-receiver_baton,
+  NULL, NULL,
   scratch_pool));
   return SVN_NO_ERROR;
 }
@@ -1725,47 +1726,41 @@ svn_wc__prop_list_recursive(svn_wc_context_t *wc_c
 void *cancel_baton,
 apr_pool_t *scratch_pool)
 {
-  struct read_dir_props_baton read_dir_baton;
-
-  if (depth = svn_depth_immediates)
+  switch (depth)
 {
-  apr_hash_t *props;
+case svn_depth_empty:
+  {
+apr_hash_t *props;
 
-  SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
-scratch_pool, scratch_pool));
-  if (receiver_func  props  apr_hash_count(props)  0)
-SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
- scratch_pool));
-  if (depth == svn_depth_empty)
-return SVN_NO_ERROR;
-}
-
-  if (depth == svn_depth_files)
-{
+SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
+  scratch_pool, scratch_pool));
+if (receiver_func  props  apr_hash_count(props)  0)
+  SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
+   scratch_pool));
+  }
+  break;
+case  svn_depth_files:
   SVN_ERR(svn_wc__db_read_props_of_files(wc_ctx-db, local_abspath,
  receiver_func, receiver_baton,
+ cancel_func, cancel_baton,
  scratch_pool));
-  return SVN_NO_ERROR;
-}
-
-  if (depth == svn_depth_immediates)
-{
+  break;
+case svn_depth_immediates:
   SVN_ERR(svn_wc__db_read_props_of_immediates(wc_ctx-db, local_abspath,
-  receiver_func,
-  receiver_baton,
+  receiver_func, 
receiver_baton,
+  cancel_func, cancel_baton,
   scratch_pool));
-  return SVN_NO_ERROR;
+  break;
+case svn_depth_infinity:
+  SVN_ERR(svn_wc__db_read_props_recursive(wc_ctx-db, local_abspath,
+  receiver_func, receiver_baton,
+  cancel_func, cancel_baton,
+  scratch_pool));
+  break;
+default:
+  SVN_ERR_MALFUNCTION();
 }
 
-  read_dir_baton.db = wc_ctx-db;
-  read_dir_baton.root_abspath = local_abspath;
-  read_dir_baton.receiver_func = receiver_func;
-  read_dir_baton.receiver_baton = receiver_baton;
-
-  SVN_ERR(svn_wc__internal_walk_children(wc_ctx-db, local_abspath, FALSE,
- read_dir_props, read_dir_baton,
- depth, cancel_func, cancel_baton,
- scratch_pool));
   return SVN_NO_ERROR;
 }
 
Index: subversion/libsvn_wc/wc-queries.sql
===
--- subversion/libsvn_wc/wc-queries.sql (revision 1070853)
+++ subversion/libsvn_wc/wc-queries.sql (working copy)
@@ -741,7 +741,62 @@ SELECT 1 FROM nodes WHERE op_depth  0;
 UPDATE nodes SET checksum = ?4
 WHERE wc_id = ?1 AND local_relpath = ?2 AND op_depth = ?3;
 
+/* - */
+/* PROOF OF CONCEPT: Complex queries for callback walks, caching results
+ in a temporary table. */
 
+-- STMT_CLEAR_NODE_PROPS_CACHE
+DROP TABLE IF EXISTS temp__node_props_cache;
+
+-- 

Re: SQLite and callbacks

2011-02-15 Thread Stefan Sperling
On Tue, Feb 15, 2011 at 01:30:45PM +0100, Stefan Sperling wrote:
 On Mon, Feb 14, 2011 at 09:48:35PM +0100, Branko Čibej wrote:
  On 14.02.2011 13:37, Stefan Sperling wrote:
   On Tue, Feb 08, 2011 at 11:50:36PM +0100, Branko Čibej wrote:
   Well, here it is, I fixed the thinko in the actual_props query and got
   all prop_tests to pass with this version.
   Just FYI, the patch doesn't apply to HEAD. I'll try to adjust it when
   I find time. But maybe you'll be quicker than me? :)
  
  It got clobbered by Hyrum's changes for pdh checking, if I read the
  conflict diff correctly. Should really be just the one conflict in
  wc_db.c, probably. I'll see if I can find time to update the patch, but
  I can't promise anything, got my hands full right now.
 
 Updated version:

The patch was missing a bump of SVN_WC__VERSION so auto-upgrade didn't work.
Update below.

I think we should commit this and continue working on it.
Is it OK to bump the WC format now?

Here's a log message:

[[[
Improve performance of svn proplist in a similar way as was done in r1039808.
But, this time, avoid problems with callbacks invoked during sqlite
transactions by storing results in a temporary table and invoking
callbacks during a query on the temporary table.

Patch by: brane

* subversion/libsvn_wc/props.c
  (read_dir_props): Adjust caller of svn_wc__db_read_props_of_immediates().
  (svn_wc__prop_list_recursive): Reimplement using new wc_db APIs.

* subversion/libsvn_wc/wc.h
  (SVN_WC__VERSION): Bump to 25 (format 25 adds NODES_CURRENT view).

* subversion/libsvn_wc/wc-queries.sql
  (STMT_CLEAR_NODE_PROPS_CACHE, STMT_CACHE_NODE_PROPS_RECURSIVE,
   STMT_CACHE_ACTUAL_PROPS_RECURSIVE, STMT_CACHE_NODE_PROPS_OF_CHILDREN,
   STMT_CACHE_ACTUAL_PROPS_OF_CHILDREN,
   STMT_SELECT_RELEVANT_PROPS_FROM_CACHE): New queries.

* subversion/libsvn_wc/wc-metadata.sql
  (NODES_CURRENT): A view on the NODES table that shows only the nodes
   with the highest op_depth.
  (STMT_UPGRADE_TO_25): Upgrade statement which creates NODES_CURRENT.

* subversion/libsvn_wc/wc_db.c
  (maybe_add_child_props, read_props_of_children): Remove.
  (cache_props_baton_t): New baton type used by cache_props_recursive().
  (cache_props_recursive): New. Helper for read_props_recursive().
  (read_props_recursive): New. Recursively reads properties from wc_db.
   Recursion can be limited by depth. Uses the new queries to create
   a temporary table containing information about properties and pass
   the results to the receiver callback.
  (svn_wc__db_read_props_of_files,
   svn_wc__db_read_props_of_immediates): Call read_props_recursive() instead
   of read_props_of_children(). Add support for cancellation.

* subversion/libsvn_wc/wc_db.h
  (svn_wc__db_read_props_of_files,
   svn_wc__db_read_props_of_immediates): Update declarations.
  (svn_wc__db_read_props_recursive): Declare.

* subversion/libsvn_wc/upgrade.c
  (bump_to_25): New.
  (svn_wc__upgrade_sdb): Upgrade to format 25.
]]]

Index: subversion/libsvn_wc/props.c
===
--- subversion/libsvn_wc/props.c(revision 1070853)
+++ subversion/libsvn_wc/props.c(working copy)
@@ -1711,6 +1711,7 @@ read_dir_props(const char *local_abspath,
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
   b-receiver_func,
   b-receiver_baton,
+  NULL, NULL,
   scratch_pool));
   return SVN_NO_ERROR;
 }
@@ -1725,47 +1726,41 @@ svn_wc__prop_list_recursive(svn_wc_context_t *wc_c
 void *cancel_baton,
 apr_pool_t *scratch_pool)
 {
-  struct read_dir_props_baton read_dir_baton;
-
-  if (depth = svn_depth_immediates)
+  switch (depth)
 {
-  apr_hash_t *props;
+case svn_depth_empty:
+  {
+apr_hash_t *props;
 
-  SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
-scratch_pool, scratch_pool));
-  if (receiver_func  props  apr_hash_count(props)  0)
-SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
- scratch_pool));
-  if (depth == svn_depth_empty)
-return SVN_NO_ERROR;
-}
-
-  if (depth == svn_depth_files)
-{
+SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
+  scratch_pool, scratch_pool));
+if (receiver_func  props  apr_hash_count(props)  0)
+  SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
+   scratch_pool));
+  }
+  break;
+case  svn_depth_files:
   SVN_ERR(svn_wc__db_read_props_of_files(wc_ctx-db, local_abspath,
  receiver_func, receiver_baton,
+ 

Re: SQLite and callbacks

2011-02-15 Thread Hyrum K Wright
On Tue, Feb 15, 2011 at 2:53 PM, Stefan Sperling s...@elego.de wrote:
 On Tue, Feb 15, 2011 at 01:30:45PM +0100, Stefan Sperling wrote:
 On Mon, Feb 14, 2011 at 09:48:35PM +0100, Branko Čibej wrote:
  On 14.02.2011 13:37, Stefan Sperling wrote:
   On Tue, Feb 08, 2011 at 11:50:36PM +0100, Branko Čibej wrote:
   Well, here it is, I fixed the thinko in the actual_props query and got
   all prop_tests to pass with this version.
   Just FYI, the patch doesn't apply to HEAD. I'll try to adjust it when
   I find time. But maybe you'll be quicker than me? :)
 
  It got clobbered by Hyrum's changes for pdh checking, if I read the
  conflict diff correctly. Should really be just the one conflict in
  wc_db.c, probably. I'll see if I can find time to update the patch, but
  I can't promise anything, got my hands full right now.

 Updated version:

 The patch was missing a bump of SVN_WC__VERSION so auto-upgrade didn't work.
 Update below.

 I think we should commit this and continue working on it.
 Is it OK to bump the WC format now?

I've not reviewed the patch, but to the question about bumping the wc
format, I think you're safe to do so (so long as the upgrade code is
properly implemented / tested, etc).

The current working copy much more gracefully handles upgrades, and is
*should* Just Work.

-Hyrum


 Here's a log message:

 [[[
 Improve performance of svn proplist in a similar way as was done in r1039808.
 But, this time, avoid problems with callbacks invoked during sqlite
 transactions by storing results in a temporary table and invoking
 callbacks during a query on the temporary table.

 Patch by: brane

 * subversion/libsvn_wc/props.c
  (read_dir_props): Adjust caller of svn_wc__db_read_props_of_immediates().
  (svn_wc__prop_list_recursive): Reimplement using new wc_db APIs.

 * subversion/libsvn_wc/wc.h
  (SVN_WC__VERSION): Bump to 25 (format 25 adds NODES_CURRENT view).

 * subversion/libsvn_wc/wc-queries.sql
  (STMT_CLEAR_NODE_PROPS_CACHE, STMT_CACHE_NODE_PROPS_RECURSIVE,
   STMT_CACHE_ACTUAL_PROPS_RECURSIVE, STMT_CACHE_NODE_PROPS_OF_CHILDREN,
   STMT_CACHE_ACTUAL_PROPS_OF_CHILDREN,
   STMT_SELECT_RELEVANT_PROPS_FROM_CACHE): New queries.

 * subversion/libsvn_wc/wc-metadata.sql
  (NODES_CURRENT): A view on the NODES table that shows only the nodes
   with the highest op_depth.
  (STMT_UPGRADE_TO_25): Upgrade statement which creates NODES_CURRENT.

 * subversion/libsvn_wc/wc_db.c
  (maybe_add_child_props, read_props_of_children): Remove.
  (cache_props_baton_t): New baton type used by cache_props_recursive().
  (cache_props_recursive): New. Helper for read_props_recursive().
  (read_props_recursive): New. Recursively reads properties from wc_db.
   Recursion can be limited by depth. Uses the new queries to create
   a temporary table containing information about properties and pass
   the results to the receiver callback.
  (svn_wc__db_read_props_of_files,
   svn_wc__db_read_props_of_immediates): Call read_props_recursive() instead
   of read_props_of_children(). Add support for cancellation.

 * subversion/libsvn_wc/wc_db.h
  (svn_wc__db_read_props_of_files,
   svn_wc__db_read_props_of_immediates): Update declarations.
  (svn_wc__db_read_props_recursive): Declare.

 * subversion/libsvn_wc/upgrade.c
  (bump_to_25): New.
  (svn_wc__upgrade_sdb): Upgrade to format 25.
 ]]]

 Index: subversion/libsvn_wc/props.c
 ===
 --- subversion/libsvn_wc/props.c        (revision 1070853)
 +++ subversion/libsvn_wc/props.c        (working copy)
 @@ -1711,6 +1711,7 @@ read_dir_props(const char *local_abspath,
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
                                               b-receiver_func,
                                               b-receiver_baton,
 +                                              NULL, NULL,
                                               scratch_pool));
   return SVN_NO_ERROR;
  }
 @@ -1725,47 +1726,41 @@ svn_wc__prop_list_recursive(svn_wc_context_t *wc_c
                             void *cancel_baton,
                             apr_pool_t *scratch_pool)
  {
 -  struct read_dir_props_baton read_dir_baton;
 -
 -  if (depth = svn_depth_immediates)
 +  switch (depth)
     {
 -      apr_hash_t *props;
 +    case svn_depth_empty:
 +      {
 +        apr_hash_t *props;

 -      SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
 -                                    scratch_pool, scratch_pool));
 -      if (receiver_func  props  apr_hash_count(props)  0)
 -        SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
 -                                 scratch_pool));
 -      if (depth == svn_depth_empty)
 -        return SVN_NO_ERROR;
 -    }
 -
 -  if (depth == svn_depth_files)
 -    {
 +        SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
 +                                      scratch_pool, scratch_pool));
 +        if (receiver_func  props  

Re: SQLite and callbacks

2011-02-15 Thread Stefan Sperling
On Tue, Feb 15, 2011 at 03:35:43PM +, Hyrum K Wright wrote:
 I've not reviewed the patch, but to the question about bumping the wc
 format, I think you're safe to do so (so long as the upgrade code is
 properly implemented / tested, etc).
 
 The current working copy much more gracefully handles upgrades, and is
 *should* Just Work.

It does Just Work here... :)


Re: SQLite and callbacks

2011-02-14 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 11:50:36PM +0100, Branko Čibej wrote:
 Well, here it is, I fixed the thinko in the actual_props query and got
 all prop_tests to pass with this version.

Just FYI, the patch doesn't apply to HEAD. I'll try to adjust it when
I find time. But maybe you'll be quicker than me? :)


Re: SQLite and callbacks

2011-02-14 Thread Branko Čibej
On 14.02.2011 13:37, Stefan Sperling wrote:
 On Tue, Feb 08, 2011 at 11:50:36PM +0100, Branko Čibej wrote:
 Well, here it is, I fixed the thinko in the actual_props query and got
 all prop_tests to pass with this version.
 Just FYI, the patch doesn't apply to HEAD. I'll try to adjust it when
 I find time. But maybe you'll be quicker than me? :)

It got clobbered by Hyrum's changes for pdh checking, if I read the
conflict diff correctly. Should really be just the one conflict in
wc_db.c, probably. I'll see if I can find time to update the patch, but
I can't promise anything, got my hands full right now.

-- Brane


Re: SQLite and callbacks

2011-02-09 Thread Ivan Zhakov
On Tue, Feb 8, 2011 at 12:50, Bert Huijben b...@qqmail.nl wrote:


 -Original Message-
 From: Branko Čibej [mailto:br...@xbc.nu] On Behalf Of Branko Cibej
 Sent: dinsdag 8 februari 2011 4:39
 To: dev@subversion.apache.org
 Subject: Re: SQLite and callbacks

 On 07.02.2011 21:51, Stefan Sperling wrote:
  A lot of wc databases out there will be
  so small that the user will hardly notice the memory increase.
  All we'd be doing is allowing sqlite to flush data to disk if needed.
  Even with a temporary table backed by a file, most operations happen
 in
  memory. Either in buffers managed by sqlite or the operating system's
  buffer cache (until sqlite does an fsync). So for small databases it
  shouldn't make a difference.

 On NTFS just creating a new 0 byte tempfile requires an fsync (and probably a 
 few in a row),
On Windows there is FILE_ATTRIBUTE_TEMPORARY [1] attribute, which
prevents file to be written to disk:
[[[
Specifying the FILE_ATTRIBUTE_TEMPORARY attribute causes file systems
to avoid writing data back to mass storage if sufficient cache memory
is available, because an application deletes a temporary file after a
handle is closed. In that case, the system can entirely avoid writing
the data. Although it does not directly control data caching in the
same way as the previously mentioned flags, the
FILE_ATTRIBUTE_TEMPORARY attribute does tell the system to hold as
much as possible in the system cache without writing and therefore may
be of concern for certain applications.
]]]

May be we should submit a patch to SQLite to use this attribute?

[1] http://msdn.microsoft.com/en-us/library/aa363858%28v=vs.85%29.aspx


-- 
Ivan Zhakov
VisualSVN Team


Re: SQLite and callbacks

2011-02-09 Thread Ivan Zhakov
On Wed, Feb 9, 2011 at 17:40, Ivan Zhakov i...@visualsvn.com wrote:
 On Tue, Feb 8, 2011 at 12:50, Bert Huijben b...@qqmail.nl wrote:
 -Original Message-
 From: Branko Čibej [mailto:br...@xbc.nu] On Behalf Of Branko Cibej
 Sent: dinsdag 8 februari 2011 4:39
 To: dev@subversion.apache.org
 Subject: Re: SQLite and callbacks

 On 07.02.2011 21:51, Stefan Sperling wrote:
  A lot of wc databases out there will be
  so small that the user will hardly notice the memory increase.
  All we'd be doing is allowing sqlite to flush data to disk if needed.
  Even with a temporary table backed by a file, most operations happen
 in
  memory. Either in buffers managed by sqlite or the operating system's
  buffer cache (until sqlite does an fsync). So for small databases it
  shouldn't make a difference.

 On NTFS just creating a new 0 byte tempfile requires an fsync (and probably 
 a few in a row),
 On Windows there is FILE_ATTRIBUTE_TEMPORARY [1] attribute, which
 prevents file to be written to disk:
 [[[
 Specifying the FILE_ATTRIBUTE_TEMPORARY attribute causes file systems
 to avoid writing data back to mass storage if sufficient cache memory
 is available, because an application deletes a temporary file after a
 handle is closed. In that case, the system can entirely avoid writing
 the data. Although it does not directly control data caching in the
 same way as the previously mentioned flags, the
 FILE_ATTRIBUTE_TEMPORARY attribute does tell the system to hold as
 much as possible in the system cache without writing and therefore may
 be of concern for certain applications.
 ]]]

 May be we should submit a patch to SQLite to use this attribute?

 [1] http://msdn.microsoft.com/en-us/library/aa363858%28v=vs.85%29.aspx


Well, I've checked SQLite source code and it seems SQLite uses
FILE_ATTRIBUTE_TEMPORARY attribute when creating temporary database,
database journal and other temporary files. So temporary tables backed
up by file should be serious problem in theory.

-- 
Ivan Zhakov
VisualSVN Team


Re: SQLite and callbacks

2011-02-09 Thread Stefan Sperling
On Wed, Feb 09, 2011 at 10:56:35PM +0300, Ivan Zhakov wrote:
 On Wed, Feb 9, 2011 at 17:40, Ivan Zhakov i...@visualsvn.com wrote:
  On Windows there is FILE_ATTRIBUTE_TEMPORARY [1] attribute, which
  prevents file to be written to disk:
  [[[
  Specifying the FILE_ATTRIBUTE_TEMPORARY attribute causes file systems
  to avoid writing data back to mass storage if sufficient cache memory
  is available, because an application deletes a temporary file after a
  handle is closed. In that case, the system can entirely avoid writing
  the data. Although it does not directly control data caching in the
  same way as the previously mentioned flags, the
  FILE_ATTRIBUTE_TEMPORARY attribute does tell the system to hold as
  much as possible in the system cache without writing and therefore may
  be of concern for certain applications.
  ]]]
 
  May be we should submit a patch to SQLite to use this attribute?
 
  [1] http://msdn.microsoft.com/en-us/library/aa363858%28v=vs.85%29.aspx
 
 
 Well, I've checked SQLite source code and it seems SQLite uses
 FILE_ATTRIBUTE_TEMPORARY attribute when creating temporary database,
 database journal and other temporary files. So temporary tables backed
 up by file should be serious problem in theory.

Did you mean to say that it should *not* be a serious problem?


Re: SQLite and callbacks

2011-02-09 Thread Ivan Zhakov
On Wed, Feb 9, 2011 at 23:06, Stefan Sperling s...@elego.de wrote:
 On Wed, Feb 09, 2011 at 10:56:35PM +0300, Ivan Zhakov wrote:
 On Wed, Feb 9, 2011 at 17:40, Ivan Zhakov i...@visualsvn.com wrote:
  On Windows there is FILE_ATTRIBUTE_TEMPORARY [1] attribute, which
  prevents file to be written to disk:
  [[[
  Specifying the FILE_ATTRIBUTE_TEMPORARY attribute causes file systems
  to avoid writing data back to mass storage if sufficient cache memory
  is available, because an application deletes a temporary file after a
  handle is closed. In that case, the system can entirely avoid writing
  the data. Although it does not directly control data caching in the
  same way as the previously mentioned flags, the
  FILE_ATTRIBUTE_TEMPORARY attribute does tell the system to hold as
  much as possible in the system cache without writing and therefore may
  be of concern for certain applications.
  ]]]
 
  May be we should submit a patch to SQLite to use this attribute?
 
  [1] http://msdn.microsoft.com/en-us/library/aa363858%28v=vs.85%29.aspx
 
 
 Well, I've checked SQLite source code and it seems SQLite uses
 FILE_ATTRIBUTE_TEMPORARY attribute when creating temporary database,
 database journal and other temporary files. So temporary tables backed
 up by file should be serious problem in theory.

 Did you mean to say that it should *not* be a serious problem?

Oops, sorry for typo. Yes, I mean that temporary tables backed up by
file should *NOT* be serious problem in theory.

-- 
Ivan Zhakov
VisualSVN Team


RE: SQLite and callbacks

2011-02-08 Thread Bert Huijben


 -Original Message-
 From: Branko Čibej [mailto:br...@xbc.nu] On Behalf Of Branko Cibej
 Sent: dinsdag 8 februari 2011 4:39
 To: dev@subversion.apache.org
 Subject: Re: SQLite and callbacks
 
 On 07.02.2011 21:51, Stefan Sperling wrote:
  A lot of wc databases out there will be
  so small that the user will hardly notice the memory increase.
  All we'd be doing is allowing sqlite to flush data to disk if needed.
  Even with a temporary table backed by a file, most operations happen
 in
  memory. Either in buffers managed by sqlite or the operating system's
  buffer cache (until sqlite does an fsync). So for small databases it
  shouldn't make a difference.

On NTFS just creating a new 0 byte tempfile requires an fsync (and probably a 
few in a row), so using the in memory buffers instead of a tempfile improved 
our SQLite performance significantly (and not only on Windows). Assuming using 
tempfiles was cheap was one of the major slowdowns of WC-1.0 on Windows.


Please don't suggest 'just making it file backed' as an easy feature if you 
only measured it on a non journaling filesystem.


With our current query scheme on 'common operations', switching to file based 
temporary storage will require rewriting almost every sql operation and how we 
use it to have release acceptable performance on Windows. A simple 'OR' or 
using a subquery may introduces a temptable.

In this thread we are looking at property storage... which was probably always 
slower than it is today. 

Yes, we can improve that, but please don't suggest introducing a 30% slowdown 
on the more common code paths like those used in 'svn status' or 'svn update' 
to improve reading many properties, without measuring the consequences.

Bert



Re: SQLite and callbacks

2011-02-08 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 10:50:46AM +0100, Bert Huijben wrote:
 
 
  -Original Message-
  From: Branko Čibej [mailto:br...@xbc.nu] On Behalf Of Branko Cibej
  Sent: dinsdag 8 februari 2011 4:39
  To: dev@subversion.apache.org
  Subject: Re: SQLite and callbacks
  
  On 07.02.2011 21:51, Stefan Sperling wrote:
   A lot of wc databases out there will be
   so small that the user will hardly notice the memory increase.
   All we'd be doing is allowing sqlite to flush data to disk if needed.
   Even with a temporary table backed by a file, most operations happen
  in
   memory. Either in buffers managed by sqlite or the operating system's
   buffer cache (until sqlite does an fsync). So for small databases it
   shouldn't make a difference.
 
 On NTFS just creating a new 0 byte tempfile requires an fsync (and probably a 
 few in a row), so using the in memory buffers instead of a tempfile improved 
 our SQLite performance significantly (and not only on Windows). Assuming 
 using tempfiles was cheap was one of the major slowdowns of WC-1.0 on Windows.
 
 
 Please don't suggest 'just making it file backed' as an easy feature if you 
 only measured it on a non journaling filesystem.
 
 
 With our current query scheme on 'common operations', switching to file based 
 temporary storage will require rewriting almost every sql operation and how 
 we use it to have release acceptable performance on Windows. A simple 'OR' or 
 using a subquery may introduces a temptable.
 
 In this thread we are looking at property storage... which was probably 
 always slower than it is today. 
 
 Yes, we can improve that, but please don't suggest introducing a 30% slowdown 
 on the more common code paths like those used in 'svn status' or 'svn update' 
 to improve reading many properties, without measuring the consequences.

I don't think that status will be released in its current form.
It does way too many queries.
We'll need to look into optmizing it using queries with temporary
tables, like Branko suggested for proplist.

Also, there is no need to use the same default on all platforms.
We can use memory-backed temp-tables on windows and file-backed
temp-tables on unix if that's what we need.


RE: SQLite and callbacks

2011-02-08 Thread Bert Huijben


 -Original Message-
 From: Stefan Sperling [mailto:s...@elego.de]
 Sent: dinsdag 8 februari 2011 15:18
 To: Bert Huijben
 Cc: 'Branko Čibej'; dev@subversion.apache.org
 Subject: Re: SQLite and callbacks
 
 On Tue, Feb 08, 2011 at 10:50:46AM +0100, Bert Huijben wrote:
 
 
   -Original Message-
   From: Branko Čibej [mailto:br...@xbc.nu] On Behalf Of Branko Cibej
   Sent: dinsdag 8 februari 2011 4:39
   To: dev@subversion.apache.org
   Subject: Re: SQLite and callbacks
  
   On 07.02.2011 21:51, Stefan Sperling wrote:
A lot of wc databases out there will be
so small that the user will hardly notice the memory increase.
All we'd be doing is allowing sqlite to flush data to disk if
 needed.
Even with a temporary table backed by a file, most operations
 happen
   in
memory. Either in buffers managed by sqlite or the operating
 system's
buffer cache (until sqlite does an fsync). So for small databases
 it
shouldn't make a difference.
 
  On NTFS just creating a new 0 byte tempfile requires an fsync (and
 probably a few in a row), so using the in memory buffers instead of a
 tempfile improved our SQLite performance significantly (and not only on
 Windows). Assuming using tempfiles was cheap was one of the major
 slowdowns of WC-1.0 on Windows.
 
 
  Please don't suggest 'just making it file backed' as an easy feature
 if you only measured it on a non journaling filesystem.
 
 
  With our current query scheme on 'common operations', switching to
 file based temporary storage will require rewriting almost every sql
 operation and how we use it to have release acceptable performance on
 Windows. A simple 'OR' or using a subquery may introduces a temptable.
 
  In this thread we are looking at property storage... which was
 probably always slower than it is today.
 
  Yes, we can improve that, but please don't suggest introducing a 30%
 slowdown on the more common code paths like those used in 'svn status'
 or 'svn update' to improve reading many properties, without measuring
 the consequences.
 
 I don't think that status will be released in its current form.
 It does way too many queries.
 We'll need to look into optmizing it using queries with temporary
 tables, like Branko suggested for proplist.

There is nobody actively working on status and there are no open issues on 
status to block branching... 

So if status is still a show-stopper we should focus on that instead of trying 
to improve 'svn proplist -R', which is not a very common operation in svn. 
(Merge works per node, so it doesn't benefit from the performance enhancements 
:( )

As things are today it looks like status will be released in its current form 
for 1.7.
I don't see a problem with the current status performance from my perspective; 
unless somebody decides to just disable in memory temptables without profiling 
to fix some other issues in a different place.

I really wish we could just decide this per query as most current queries only 
use temptables for 0-5 rows, but I don't see that as an easy option. Maybe one 
of the SQLite devs has a solution here?

An even better solution would be that SQLite tries to do things completely in 
memory and only *creates* a tempfile when needed. (It seems it now creates the 
file anyway; but doesn't use it until needed. Introducing a heavy performance 
penalty on NTFS, but not on extXfs)

 Also, there is no need to use the same default on all platforms.
 We can use memory-backed temp-tables on windows and file-backed
 temp-tables on unix if that's what we need.

Per platform (or possibly per filesystem) defaults also need profile details to 
make the right decisions. (Ram is not cheap in AnkhSVN as many users fill up 
their 4 GB in Visual Studio, but neither are tempfiles)

I don't have a problem with choosing file backed temptables over memory backed, 
but I do have an issue with doing it for theoretical reasons which are only 
tested under very specific circumstances. And a recursive proplist is not a 
very common and/or performance critical subversion operation from my 
perspective.

Bert



Re: SQLite and callbacks

2011-02-08 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 04:34:52PM +0100, Bert Huijben wrote:
 There is nobody actively working on status and there are no open
 issues on status to block branching... 

There's the general wc-ng performance issue (but I don't think it has
an issue number attached to it right now).

 So if status is still a show-stopper we should focus on that instead
 of trying to improve 'svn proplist -R', which is not a very common
 operation in svn. (Merge works per node, so it doesn't benefit from
 the performance enhancements :( )

I started working on proplist because it's pretty simple and therefore
a good way to get going. My end goal is not to improve proplist.
I want to understand where wc-ng performance problems come from and
how to fix them.

I do intend to look at other subcommands once proplist has been dealt with.

 
 As things are today it looks like status will be released in its current form 
 for 1.7.
 I don't see a problem with the current status performance from my 
 perspective; unless somebody decides to just disable in memory temptables 
 without profiling to fix some other issues in a different place.

Ah, so status performs well using per-node queries?
I thought it was still worse compared to 1.6.x for some use cases.
See http://svn.haxx.se/dev/archive-2010-09/0526.shtml
I think we should try to get trunk to perform at least as well as 1.6.x
for all uses cases before release.

 I don't have a problem with choosing file backed temptables over
 memory backed, but I do have an issue with doing it for theoretical
 reasons which are only tested under very specific circumstances. And a
 recursive proplist is not a very common and/or performance critical
 subversion operation from my perspective.

Of course proplist isn't very critical. But that's not the point.

The point is that Subversion should be written in a way that prevents
it from running out of memory when it operates on large working copies.
What if users run into problems with large working copies with 1.7?
Have we tested what happens with very large working copies?

Code that blows up if the input data set is too large is not acceptable.
Using file-backed temporary tables might be slower, but it's a safe default.


Re: SQLite and callbacks

2011-02-08 Thread Philip Martin
Stefan Sperling s...@elego.de writes:

 Ah, so status performs well using per-node queries?
 I thought it was still worse compared to 1.6.x for some use cases.
 See http://svn.haxx.se/dev/archive-2010-09/0526.shtml
 I think we should try to get trunk to perform at least as well as 1.6.x
 for all uses cases before release.

Status switched to per-directory queries in r1022931 (2010-10-15).  In
some cases 1.7 is faster than 1.6, in other cases it is a bit
slower.  Performance on network disks is still a concern.

-- 
Philip


Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 08.02.2011 16:34, Bert Huijben wrote:
 An even better solution would be that SQLite tries to do things completely in 
 memory and only *creates* a tempfile when needed. (It seems it now creates 
 the file anyway; but doesn't use it until needed. Introducing a heavy 
 performance penalty on NTFS, but not on extXfs)

You mentioned testing on journalled filesystems. Maybe you don't
consider ext3 and ext4 to be journalled, but my tests were done on
journalled HFS+ on Mac OS.

Yes, creating a temporary file is moderately expensive. But in the
approach I showed, you really only create one temporary table and/or
database per WC operation, not 50 zillion.

That said, I agree that a lot more testing and measuring needs to be
done. And after all, a memory-backed temporary table is in the worst
case backed by swap space.

-- Brane



Re: SQLite and callbacks

2011-02-08 Thread Hyrum K Wright
On Tue, Feb 8, 2011 at 4:15 PM, Stefan Sperling s...@elego.de wrote:
 On Tue, Feb 08, 2011 at 04:34:52PM +0100, Bert Huijben wrote:
 There is nobody actively working on status and there are no open
 issues on status to block branching...

 There's the general wc-ng performance issue (but I don't think it has
 an issue number attached to it right now).

If performance of 'status' is a release blocker, we need to document
and treat it as such.

Also instead of nebulous handwaving about performance is bad, it
would be nice to have actually datasets and actual numbers.  We have a
VM at the ASF which could be used for hosting a set of benchmarking
data; we just need somebody to put together the data and write the
scripts which run the benchmarks.

[And yes, I realize that I bring up all these points without actually
volunteering to help with the situation.  *sigh* ]

 So if status is still a show-stopper we should focus on that instead
 of trying to improve 'svn proplist -R', which is not a very common
 operation in svn. (Merge works per node, so it doesn't benefit from
 the performance enhancements :( )

 I started working on proplist because it's pretty simple and therefore
 a good way to get going. My end goal is not to improve proplist.
 I want to understand where wc-ng performance problems come from and
 how to fix them.

 I do intend to look at other subcommands once proplist has been dealt with.


 As things are today it looks like status will be released in its current 
 form for 1.7.
 I don't see a problem with the current status performance from my 
 perspective; unless somebody decides to just disable in memory temptables 
 without profiling to fix some other issues in a different place.

 Ah, so status performs well using per-node queries?
 I thought it was still worse compared to 1.6.x for some use cases.
 See http://svn.haxx.se/dev/archive-2010-09/0526.shtml
 I think we should try to get trunk to perform at least as well as 1.6.x
 for all uses cases before release.

 I don't have a problem with choosing file backed temptables over
 memory backed, but I do have an issue with doing it for theoretical
 reasons which are only tested under very specific circumstances. And a
 recursive proplist is not a very common and/or performance critical
 subversion operation from my perspective.

 Of course proplist isn't very critical. But that's not the point.

 The point is that Subversion should be written in a way that prevents
 it from running out of memory when it operates on large working copies.
 What if users run into problems with large working copies with 1.7?
 Have we tested what happens with very large working copies?

 Code that blows up if the input data set is too large is not acceptable.
 Using file-backed temporary tables might be slower, but it's a safe default.

I've been pleading with folks I know who use large working copies to
do some testing, but I haven't gotten any feedback from them, partly
because following up with everybody is kind of a pain, and partly
because there aren't any packaged binaries for people to test.  Would
it be worth it to have a beta-test...@subversion.apache.org list,
which we could use to communicate with the bleeding-edge folks in
release situations like this?

It shouldn't be difficult to create a large working copy ('svn co
^/subversion') and use that for testing, but getting more insight than
just our own dataset would be very nice.

-Hyrum


Re: SQLite and callbacks

2011-02-08 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 05:13:50PM +, Hyrum K Wright wrote:
 On Tue, Feb 8, 2011 at 4:15 PM, Stefan Sperling s...@elego.de wrote:
  On Tue, Feb 08, 2011 at 04:34:52PM +0100, Bert Huijben wrote:
  There is nobody actively working on status and there are no open
  issues on status to block branching...
 
  There's the general wc-ng performance issue (but I don't think it has
  an issue number attached to it right now).
 
 If performance of 'status' is a release blocker, we need to document
 and treat it as such.
 
 Also instead of nebulous handwaving about performance is bad, it
 would be nice to have actually datasets and actual numbers.  We have a
 VM at the ASF which could be used for hosting a set of benchmarking
 data; we just need somebody to put together the data and write the
 scripts which run the benchmarks.

I'll ask Neels about running his set of performance tests on the VM
(see http://svn.haxx.se/dev/archive-2010-09/0526.shtml).

I know he's not following dev@ closely ATM, but maybe he'll see this one
anyway cause his name is in it -- but I can more easily catch his attention
by waving across the yard from my kitchen window :)

 [And yes, I realize that I bring up all these points without actually
 I've been pleading with folks I know who use large working copies to
 do some testing, but I haven't gotten any feedback from them, partly
 because following up with everybody is kind of a pain, and partly
 because there aren't any packaged binaries for people to test.  Would
 it be worth it to have a beta-test...@subversion.apache.org list,
 which we could use to communicate with the bleeding-edge folks in
 release situations like this?
 
 It shouldn't be difficult to create a large working copy ('svn co
 ^/subversion') and use that for testing, but getting more insight than
 just our own dataset would be very nice.

We'll find some way to test with large WCs, no doubt.

In any case, I think that changing our code to use sqlite more
effectively, in the manner suggested by Branko, is the way to go.
The approach would also allow us to address Stefan Küng's requests
in http://svn.haxx.se/dev/archive-2011-02/0104.shtml for APIs that
quickly pull various information from the DB.

Stefan


Re: SQLite and callbacks

2011-02-08 Thread Hyrum K Wright
On Tue, Feb 8, 2011 at 7:27 PM, Stefan Sperling s...@elego.de wrote:
 On Tue, Feb 08, 2011 at 05:13:50PM +, Hyrum K Wright wrote:
 On Tue, Feb 8, 2011 at 4:15 PM, Stefan Sperling s...@elego.de wrote:
  On Tue, Feb 08, 2011 at 04:34:52PM +0100, Bert Huijben wrote:
  There is nobody actively working on status and there are no open
  issues on status to block branching...
 
  There's the general wc-ng performance issue (but I don't think it has
  an issue number attached to it right now).

 If performance of 'status' is a release blocker, we need to document
 and treat it as such.

 Also instead of nebulous handwaving about performance is bad, it
 would be nice to have actually datasets and actual numbers.  We have a
 VM at the ASF which could be used for hosting a set of benchmarking
 data; we just need somebody to put together the data and write the
 scripts which run the benchmarks.

 I'll ask Neels about running his set of performance tests on the VM
 (see http://svn.haxx.se/dev/archive-2010-09/0526.shtml).

 I know he's not following dev@ closely ATM, but maybe he'll see this one
 anyway cause his name is in it -- but I can more easily catch his attention
 by waving across the yard from my kitchen window :)

 [And yes, I realize that I bring up all these points without actually
 I've been pleading with folks I know who use large working copies to
 do some testing, but I haven't gotten any feedback from them, partly
 because following up with everybody is kind of a pain, and partly
 because there aren't any packaged binaries for people to test.  Would
 it be worth it to have a beta-test...@subversion.apache.org list,
 which we could use to communicate with the bleeding-edge folks in
 release situations like this?

 It shouldn't be difficult to create a large working copy ('svn co
 ^/subversion') and use that for testing, but getting more insight than
 just our own dataset would be very nice.

 We'll find some way to test with large WCs, no doubt.

 In any case, I think that changing our code to use sqlite more
 effectively, in the manner suggested by Branko, is the way to go.
 The approach would also allow us to address Stefan Küng's requests
 in http://svn.haxx.se/dev/archive-2011-02/0104.shtml for APIs that
 quickly pull various information from the DB.

All good ideas.

One of my greater concerns is that we don't have a concrete answer to
we'll release when  for the performance question.  What is good
enough?  Which operations?  How much better than 1.6.x?  Having a
concrete answer, and not just when it feels good will give us some
objective criteria, and prevent the one more bugfix delays we've had
in the past.  We're fooling ourselves if we think we're going to nail
all the performance issues before the 1.7.0 release.

The silver lining here is that most performance problems are *not*
going to have compat implications, so they can easily be backported in
future patch releases.

-Hyrum


Re: SQLite and callbacks

2011-02-08 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 07:40:03PM +, Hyrum K Wright wrote:
 One of my greater concerns is that we don't have a concrete answer to
 we'll release when  for the performance question.  What is good
 enough?  Which operations?  How much better than 1.6.x?  Having a
 concrete answer, and not just when it feels good will give us some
 objective criteria, and prevent the one more bugfix delays we've had
 in the past.  We're fooling ourselves if we think we're going to nail
 all the performance issues before the 1.7.0 release.

I would say the minimum is as good as 1.6. And if we're doing this
smartly it's likely that trying to get trunk up to par with 1.6 will
boost performance beyond 1.6.x capabilities anyway.

 The silver lining here is that most performance problems are *not*
 going to have compat implications, so they can easily be backported in
 future patch releases.

Sure, we can always try to make it faster.

What we need to avoid, though, is making the 1.7 release a disappointment
from a performance point of view. If 1.7 is any slower than the, by current
standards, glacial 1.6, it would just be a waste of a release.
It's likely that quite a few of our users would jump ship even if we
promised to follow up with performance improvements in 1.8.

git and hg have less development baggage to carry so they can release
improvements much quicker. And they're already much faster than Subversion 1.6
is in general, even in cases where svn only does local i/o.
Talking to the server over the network will always be slower than talking
to a repository on local disk, but there's no good excuse to be much slower
than alternative systems in other cases...


Re: SQLite and callbacks

2011-02-08 Thread Hyrum K Wright
On Tue, Feb 8, 2011 at 7:59 PM, Stefan Sperling s...@elego.de wrote:
 On Tue, Feb 08, 2011 at 07:40:03PM +, Hyrum K Wright wrote:
 One of my greater concerns is that we don't have a concrete answer to
 we'll release when  for the performance question.  What is good
 enough?  Which operations?  How much better than 1.6.x?  Having a
 concrete answer, and not just when it feels good will give us some
 objective criteria, and prevent the one more bugfix delays we've had
 in the past.  We're fooling ourselves if we think we're going to nail
 all the performance issues before the 1.7.0 release.

 I would say the minimum is as good as 1.6. And if we're doing this
 smartly it's likely that trying to get trunk up to par with 1.6 will
 boost performance beyond 1.6.x capabilities anyway.

 The silver lining here is that most performance problems are *not*
 going to have compat implications, so they can easily be backported in
 future patch releases.

 Sure, we can always try to make it faster.

 What we need to avoid, though, is making the 1.7 release a disappointment
 from a performance point of view. If 1.7 is any slower than the, by current
 standards, glacial 1.6, it would just be a waste of a release.
 It's likely that quite a few of our users would jump ship even if we
 promised to follow up with performance improvements in 1.8.

 git and hg have less development baggage to carry so they can release
 improvements much quicker. And they're already much faster than Subversion 1.6
 is in general, even in cases where svn only does local i/o.
 Talking to the server over the network will always be slower than talking
 to a repository on local disk, but there's no good excuse to be much slower
 than alternative systems in other cases...

Completely agree.  My only point is that whether we say as fast as
1.6 or 10% better than 1.6 or anything else, we need some metric to
measure it, or we're left to handwaving and bikesheds and discussions
like this one. :)  We'll never know when we're finished.  (And
gathering such numbers is something which can be automated.)

Going back to my cave now...

-Hyrum


Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 08.02.2011 18:13, Hyrum K Wright wrote:
 It shouldn't be difficult to create a large working copy ('svn co
 ^/subversion') and use that for testing, but getting more insight than
 just our own dataset would be very nice.

Except that, as I noted elsewhere in this thread, even 'svn co
^/subversion/tags' get the (current trunk) svn process up to 2.5 gigs
before the whole thing falls down, and it takes forever to do that. I'd
say that's another release blocker.

-- Brane


Re: SQLite and callbacks

2011-02-08 Thread Stefan Sperling
On Tue, Feb 08, 2011 at 11:37:16PM +0100, Branko Čibej wrote:
 On 08.02.2011 18:13, Hyrum K Wright wrote:
  It shouldn't be difficult to create a large working copy ('svn co
  ^/subversion') and use that for testing, but getting more insight than
  just our own dataset would be very nice.
 
 Except that, as I noted elsewhere in this thread, even 'svn co
 ^/subversion/tags' get the (current trunk) svn process up to 2.5 gigs
 before the whole thing falls down, and it takes forever to do that. I'd
 say that's another release blocker.

Is this perchance due to the known serf memory leaks which were
fixed in serf 0.7.1?


Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 07.02.2011 21:57, Stefan Sperling wrote:
 On Mon, Feb 07, 2011 at 09:32:48PM +0100, Branko Čibej wrote:
 On 07.02.2011 17:10, Stefan Sperling wrote:
 The bug is probabaly in the following query.
 Maybe the INSERT OR REPLACE doesn't work as intended?
 And why is COMMIT TRANSACTION commented, BTW? Is this the problem?

 -- STMT_REPLACE_ACTUAL_PROPS_IN_CACHE
 INSERT OR REPLACE INTO temp_query_cache.node_props_cache
   (local_relpath, properties)
   SELECT N.local_relpath, N.properties
 FROM actual_node AS N JOIN temp_query_cache.node_props_cache AS C
   ON N.local_relpath = C.local_relpath
 AND  N.wc_id = C.wc_id;
 Yes, this query that handles actual_nodes is bogus. I've been looking at
 fixing it, will update the patch when I find a solution.

 Or if you like, I can just commit it -- but that'd be throwing code over
 my shoulder, as there's little chance that I'd have time to maintain
 such a thing.
 If you prefer, post the patch and I will make sure to understand
 it sufficiently enough to be able to maintain it when it gets committed.

Well, here it is, I fixed the thinko in the actual_props query and got
all prop_tests to pass with this version. Did I say that the way
ACTUAL_NODE is separate from NODE makes these kinds of WC operations
(that merge results from both tables) quite complex and expensive?

There are other differences from the previous patch:

* I use a plain temporary table for the query cache instead of a
  temporary database. It turns out that the temp_store pragma
  affects even nominally file-backed temporary databases, so there's
  no actual difference when using one or the other.
* I introduced a new view to the wc_db schema, called NODES_CURRENT,
  which represents a query on NODES that filters out the duplicates
  by keeping only the version of each (wc_id, local_relpath) with
  the highest op_depth. There are any number of WC operations that
  do essentially the same thing in C code, so using this view in the
  related SQL queries could simplify the code that processes the
  results quite a bit. The addition of this view requires a WC
  version bump, also in the patch.

-- Brane
Index: subversion/libsvn_wc/props.c
===
--- subversion/libsvn_wc/props.c(revision 1068618)
+++ subversion/libsvn_wc/props.c(working copy)
@@ -1711,6 +1711,7 @@
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
   b-receiver_func,
   b-receiver_baton,
+  NULL, NULL,
   scratch_pool));
   return SVN_NO_ERROR;
 }
@@ -1725,47 +1726,41 @@
 void *cancel_baton,
 apr_pool_t *scratch_pool)
 {
-  struct read_dir_props_baton read_dir_baton;
-
-  if (depth = svn_depth_immediates)
+  switch (depth)
 {
-  apr_hash_t *props;
+case svn_depth_empty:
+  {
+apr_hash_t *props;
 
-  SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
-scratch_pool, scratch_pool));
-  if (receiver_func  props  apr_hash_count(props)  0)
-SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
- scratch_pool));
-  if (depth == svn_depth_empty)
-return SVN_NO_ERROR;
-}
-
-  if (depth == svn_depth_files)
-{
+SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
+  scratch_pool, scratch_pool));
+if (receiver_func  props  apr_hash_count(props)  0)
+  SVN_ERR((*receiver_func)(receiver_baton, local_abspath, props,
+   scratch_pool));
+  }
+  break;
+case  svn_depth_files:
   SVN_ERR(svn_wc__db_read_props_of_files(wc_ctx-db, local_abspath,
  receiver_func, receiver_baton,
+ cancel_func, cancel_baton,
  scratch_pool));
-  return SVN_NO_ERROR;
-}
-
-  if (depth == svn_depth_immediates)
-{
+  break;
+case svn_depth_immediates:
   SVN_ERR(svn_wc__db_read_props_of_immediates(wc_ctx-db, local_abspath,
-  receiver_func,
-  receiver_baton,
+  receiver_func, 
receiver_baton,
+  cancel_func, cancel_baton,
   scratch_pool));
-  return SVN_NO_ERROR;
+  break;
+case svn_depth_infinity:
+  SVN_ERR(svn_wc__db_read_props_recursive(wc_ctx-db, local_abspath,
+  receiver_func, 

Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 08.02.2011 23:47, Stefan Sperling wrote:
 On Tue, Feb 08, 2011 at 11:37:16PM +0100, Branko Čibej wrote:
 On 08.02.2011 18:13, Hyrum K Wright wrote:
 It shouldn't be difficult to create a large working copy ('svn co
 ^/subversion') and use that for testing, but getting more insight than
 just our own dataset would be very nice.
 Except that, as I noted elsewhere in this thread, even 'svn co
 ^/subversion/tags' get the (current trunk) svn process up to 2.5 gigs
 before the whole thing falls down, and it takes forever to do that. I'd
 say that's another release blocker.
 Is this perchance due to the known serf memory leaks which were
 fixed in serf 0.7.1?

Well, I just now updated my trunk working copy and had to go install a
newer serf to get it to build (and incidentally ran afoul of the diff
bug mentioned elsewhere, it truncated a couple files, most unpleasant).

I'll try a smaller checkout (just tags/ebcdic, which also failed by
itself before) and report.

-- Brane


Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 08.02.2011 23:57, Branko Čibej wrote:
 On 08.02.2011 23:47, Stefan Sperling wrote:
 On Tue, Feb 08, 2011 at 11:37:16PM +0100, Branko Čibej wrote:
 On 08.02.2011 18:13, Hyrum K Wright wrote:
 It shouldn't be difficult to create a large working copy ('svn co
 ^/subversion') and use that for testing, but getting more insight than
 just our own dataset would be very nice.
 Except that, as I noted elsewhere in this thread, even 'svn co
 ^/subversion/tags' get the (current trunk) svn process up to 2.5 gigs
 before the whole thing falls down, and it takes forever to do that. I'd
 say that's another release blocker.
 Is this perchance due to the known serf memory leaks which were
 fixed in serf 0.7.1?
 Well, I just now updated my trunk working copy and had to go install a
 newer serf to get it to build (and incidentally ran afoul of the diff
 bug mentioned elsewhere, it truncated a couple files, most unpleasant).

 I'll try a smaller checkout (just tags/ebcdic, which also failed by
 itself before) and report.

Using Serf 0.7.1 definitely helps, the checkout of
^/subversion/tags/ebcdic was now successful, took 19 minutes and got svn
memory usage up to 360MB. I'm not too happy about the last two points,
but at least it works.

-- Brane


Re: SQLite and callbacks

2011-02-08 Thread Branko Čibej
On 08.02.2011 23:50, Branko Čibej wrote:
 Well, here it is, I fixed the thinko in the actual_props query and got
 all prop_tests to pass with this version. Did I say that the way
 ACTUAL_NODE is separate from NODE makes these kinds of WC operations
 (that merge results from both tables) quite complex and expensive?

By the way, there's a simpler query that'll return the same result, but
it uses an outer join instead of an inner join and takes forever -- it'd
be twice as slow as the plain-vanilla trunk is right now.

I suspect that if instead we materialized that outer join by simply
merging NODES and ACTUAL_NODE into one table, denoting the ACTUAL_ bit
with a new (higher) op_depth value, then my patch could have dropped a
join and a whole query, making the proplist about twice as fast as the
patch currently makes it. I'm sure a number of other queries could be
improved that way, too. It would mean dropping some NOT NULL
constraints, and would make the NODES table a bit larger; but
performance-wise, having the whole shebang in a single table and
avoiding outer joins (either programmatic or in queries) would open the
door for further performance improvements and code simplification.

-- Brane



Re: SQLite and callbacks

2011-02-08 Thread Philip Martin
Branko Čibej br...@e-reka.si writes:

 Using Serf 0.7.1 definitely helps, the checkout of
 ^/subversion/tags/ebcdic was now successful, took 19 minutes and got svn
 memory usage up to 360MB. I'm not too happy about the last two points,
 but at least it works.

I checked out ^/subversion/branches using HEAD over ra_neon on Linux.
Memory use as reported by top started at about 120MB and grew at about
1MB per branch reaching about 180MB after all 52 branches.

This is one case where our central pristine store saves disk space due
to duplicate files:

$ du -hs wcng
2.3Gwcng
$ du -hs wcng/.svn
574Mwcng/.svn
$ du -hs wcng/.svn/wc.db 
68M wcng/.svn/wc.db
$ sqlite3 wcng/.svn/wc.db select count(*) from nodes
87260
$ time svn st wcng

real0m3.983s
user0m2.544s
sys 0m1.412s

The memory grows much faster using ra_serf/serf-0.7.1, about 20MB per
branch.  It's harder to monitor because the order is less predictable.
By the time I had 35 of 52 branches present memory had grown to 750MB,
however for the remaining branches it didn't grow any further.  I don't
understand it, but something appeared to cap the memory use.

-- 
Philip


Re: SQLite and callbacks

2011-02-08 Thread Neels Hofmeyr
On Tue, 2011-02-08 at 20:27 +0100, Stefan Sperling wrote:
  Also instead of nebulous handwaving about performance is bad, it
  would be nice to have actually datasets and actual numbers.  We have a
  VM at the ASF which could be used for hosting a set of benchmarking
  data; we just need somebody to put together the data and write the
  scripts which run the benchmarks.
 
 I'll ask Neels about running his set of performance tests on the VM
 (see http://svn.haxx.se/dev/archive-2010-09/0526.shtml).

Wow, looking back at my own results, it seems so far away. Man, it's
slowly getting half a year since I first ran the benchmark.

If you have instructions for me to set up my humble tests on some
machine, I might be tempted to cook up some digest reporting on top of
it. The test script itself might benefit from a review, though.

If we run it on some idling box somewhere, we could simply increase the
extent of the test from 4x4 dir levels to a few more (keeping an eye on
exponential growth so it takes less than a day to run...).

I'm not sure if it's necessary to even care about other runs than
ra_local, since we're testing libsvn_wc performance, right? ra_local
should be where libsvn_wc perf loss is the most visible.

I still have that article about svn 1.7 waiting unreleased and growing a
beard. I don't want to print an article that has to say svn 1.7 grew
slower than 1.6, so I'd want 1.7 to be same speed as 1.6 for a release.
We can already brag about massive memory usage improvement with deep
WCs. If we can just skip having to say BUT it's slower in many common
cases, that'd be great.

 
 I know he's not following dev@ closely ATM, but maybe he'll see this one
 anyway cause his name is in it -- but I can more easily catch his attention
 by waving across the yard from my kitchen window :)

(monospace)
[[[
|   *  |
| .|   __
|  |   |   ~|
|  |   |  ~ |  o/
__  |  |   || /|  _o/
| ~  |  |  |  / \ /_\  (-- ascii daughter)
\o  |  ~ |  |  |
 |\ ||  |  |
/ \ |  |
|


]]]

:)
And all the neighbors left bewildered.

~Neels




Re: SQLite and callbacks

2011-02-07 Thread Branko Čibej
On 07.02.2011 02:52, Branko Čibej wrote:
 Note that I didn't run the tests with this patch, so I'm not claiming it
 to be bug-free. In fact there's one bug in the recursive proplist of the
 WC root, where the root props show up twice -- there's an issue with the
 filter in the query there. But it's proof-of-concept after all.

Well, I fixed that bug and the new patch is attached. I ran
prop_tests.py, and did get one failure (see below); could be a
difference in the output (the results of a recursive proplist are
ordered now), but I don't plan to investigate that; whoever takes this
patch and runs with it is welcome to do so. :)

-- Brane

Error: expected keywords:  ['new-add', 'new-keep', 'p', 'p', 'Properties on ', 
'Properties on ']
   actual full output: ['new-add\n', 'new-keep\n', '
old-keep\n', '  p\n', '  p\n', '  p\n', Properties on 
'svn-test-work/working_copies/prop_tests-15/A/added':\n, Properties on 
'svn-test-work/working_copies/prop_tests-15/iota':\n, Properties on 
'svn-test-work/working_copies/prop_tests-15/iota':\n]
Traceback (most recent call last):
  File 
/Users/brane/src/svn/repos/trunk/subversion/tests/cmdline/svntest/main.py, 
line 1222, in run
rc = self.pred.run(sandbox)
  File 
/Users/brane/src/svn/repos/trunk/subversion/tests/cmdline/svntest/testcase.py,
 line 182, in run
return self.func(sandbox)
  File ./prop_tests.py, line 1093, in recursive_base_wc_ops
output, errput)
  File ./prop_tests.py, line 1053, in verify_output
raise svntest.Failure
Failure
FAIL:  prop_tests.py 15: recursive property operations in BASE and WC


Index: subversion/libsvn_wc/props.c
===
--- subversion/libsvn_wc/props.c(revision 1067878)
+++ subversion/libsvn_wc/props.c(working copy)
@@ -1711,6 +1711,7 @@
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
   b-receiver_func,
   b-receiver_baton,
+  NULL, NULL,
   scratch_pool));
   return SVN_NO_ERROR;
 }
@@ -1725,9 +1726,12 @@
 void *cancel_baton,
 apr_pool_t *scratch_pool)
 {
-  struct read_dir_props_baton read_dir_baton;
+  svn_boolean_t is_root;
 
-  if (depth = svn_depth_immediates)
+  SVN_ERR(svn_wc__db_is_wcroot(is_root, wc_ctx-db, local_abspath,
+   scratch_pool));
+
+  if (depth = svn_depth_immediates || !is_root)
 {
   apr_hash_t *props;
 
@@ -1744,28 +1748,28 @@
 {
   SVN_ERR(svn_wc__db_read_props_of_files(wc_ctx-db, local_abspath,
  receiver_func, receiver_baton,
+ cancel_func, cancel_baton,
  scratch_pool));
-  return SVN_NO_ERROR;
 }
-
-  if (depth == svn_depth_immediates)
+  else if (depth == svn_depth_immediates)
 {
   SVN_ERR(svn_wc__db_read_props_of_immediates(wc_ctx-db, local_abspath,
-  receiver_func,
-  receiver_baton,
+  receiver_func, 
receiver_baton,
+  cancel_func, cancel_baton,
   scratch_pool));
-  return SVN_NO_ERROR;
 }
+  else if (depth == svn_depth_infinity)
+{
+  SVN_ERR(svn_wc__db_read_props_recursive(wc_ctx-db, local_abspath,
+  receiver_func, receiver_baton,
+  cancel_func, cancel_baton,
+  scratch_pool));
+}
+  else
+{
+  SVN_ERR_MALFUNCTION();
+}
 
-  read_dir_baton.db = wc_ctx-db;
-  read_dir_baton.root_abspath = local_abspath;
-  read_dir_baton.receiver_func = receiver_func;
-  read_dir_baton.receiver_baton = receiver_baton;
-
-  SVN_ERR(svn_wc__internal_walk_children(wc_ctx-db, local_abspath, FALSE,
- read_dir_props, read_dir_baton,
- depth, cancel_func, cancel_baton,
- scratch_pool));
   return SVN_NO_ERROR;
 }
 
Index: subversion/libsvn_wc/wc-queries.sql
===
--- subversion/libsvn_wc/wc-queries.sql (revision 1067878)
+++ subversion/libsvn_wc/wc-queries.sql (working copy)
@@ -726,7 +726,69 @@
 UPDATE nodes SET checksum = ?4
 WHERE wc_id = ?1 AND local_relpath = ?2 AND op_depth = ?3;
 
+/* - */
+/* PROOF OF CONCEPT: Complex queries for callback walks, using file-backed
+ *   temporary database. */
 
+-- 

Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 12:43:41PM +0100, Stefan Sperling wrote:
 On Mon, Feb 07, 2011 at 10:30:15AM +0100, Branko Čibej wrote:
  On 07.02.2011 02:52, Branko Čibej wrote:
   Note that I didn't run the tests with this patch, so I'm not claiming it
   to be bug-free. In fact there's one bug in the recursive proplist of the
   WC root, where the root props show up twice -- there's an issue with the
   filter in the query there. But it's proof-of-concept after all.
  
  Well, I fixed that bug and the new patch is attached. I ran
  prop_tests.py, and did get one failure (see below); could be a
  difference in the output (the results of a recursive proplist are
  ordered now), but I don't plan to investigate that; whoever takes this
  patch and runs with it is welcome to do so. :)
 
 Thanks a lot! I'll take a look.

I like this patch a lot and think we should commit it after fixing
the test failure. It seems like temporary tables are the answer to
this problem. I expected this to be too slow, but it's very fast.

Where is the temporary table stored? Is it back by a file or memory?
If backed by memory, do we have to worry about memory consumption for
large working copies?


Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 04:52:30PM +0100, Stefan Sperling wrote:
 I like this patch a lot and think we should commit it after fixing
 the test failure.

The test is failing because your code lists both the BASE properties
and the ACTUAL properties for a node which had its props changed:

sqlite select local_relpath, properties, op_depth from nodes where 
local_relpath = iota;
iota|(p old-keep)|0
sqlite select local_relpath, properties from actual_node where local_relpath = 
iota;
iota|(p new-keep)

The test expects only new-keep to be listed, but both are listed.
old-keep should not appear in the output:

CMD: svn proplist -R -v svn-test-work/working_copies/prop_tests-15 --config-dir 
/home/stsp/svn/svn-trunk/subversion/tests/cmdline/svn-test-work/local_tmp/config
 --password rayjandom --no-auth-cache --username jrandom
TIME = 0.252734
Properties on 'svn-test-work/working_copies/prop_tests-15/A/added':
  p
new-add
Properties on 'svn-test-work/working_copies/prop_tests-15/iota':
  p
old-keep
Properties on 'svn-test-work/working_copies/prop_tests-15/iota':
  p
new-keep
Error: expected keywords:  ['new-add', 'new-keep', 'p', 'p', 'Properties on ', '
Properties on ']
   actual full output: ['new-add\n', 'new-keep\n', 'old-keep\n',
 '  p\n', '  p\n', '  p\n', Properties on 'svn-test-work/working_copies/prop_te
sts-15/A/added':\n, Properties on 'svn-test-work/working_copies/prop_tests-15/
iota':\n, Properties on 'svn-test-work/working_copies/prop_tests-15/iota':\n]
Traceback (most recent call last):

The bug is probabaly in the following query.
Maybe the INSERT OR REPLACE doesn't work as intended?
And why is COMMIT TRANSACTION commented, BTW? Is this the problem?

-- STMT_REPLACE_ACTUAL_PROPS_IN_CACHE
INSERT OR REPLACE INTO temp_query_cache.node_props_cache
  (local_relpath, properties)
  SELECT N.local_relpath, N.properties
FROM actual_node AS N JOIN temp_query_cache.node_props_cache AS C
  ON N.local_relpath = C.local_relpath
AND  N.wc_id = C.wc_id;
/*COMMIT TRANSACTION;*/



Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 10:55:47AM -0500, Mark Phippard wrote:
 On Mon, Feb 7, 2011 at 10:52 AM, Stefan Sperling s...@elego.de wrote:
 
  Where is the temporary table stored? Is it back by a file or memory?
  If backed by memory, do we have to worry about memory consumption for
  large working copies?
 
 The patch says it is backed by a file.

Yes, it does say that in a comment but I didn't see where this is being
enforced in the code.
Checking the sqlite docs gave the answer:

  When the name of the database file handed to sqlite3_open() or to ATTACH
   is an empty string, then a new temporary file is created to hold the
   database.
http://sqlite.org/inmemorydb.html

This is what the patch does:

+-- STMT_ATTACH_TEMPORARY_DATABASE
+ATTACH DATABASE '' AS temp_query_cache;

And note that the sqlite docs also say:

  Even though a disk file is allocated for each temporary database, in
   practice the temporary database usually resides in the in-memory pager
   cache and hence is very little difference between a pure in-memory
   database created by :memory: and a temporary database created by an
   empty filename. The sole difference is that a :memory: database must
   remain in memory at all times whereas parts of a temporary database
   might be flushed to disk if database becomes large or if SQLite comes
   under memory pressure.

Neat :)


Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 05:18:57PM +0100, Stefan Sperling wrote:
 On Mon, Feb 07, 2011 at 10:55:47AM -0500, Mark Phippard wrote:
  On Mon, Feb 7, 2011 at 10:52 AM, Stefan Sperling s...@elego.de wrote:
  
   Where is the temporary table stored? Is it back by a file or memory?
   If backed by memory, do we have to worry about memory consumption for
   large working copies?
  
  The patch says it is backed by a file.
 
 Yes, it does say that in a comment but I didn't see where this is being
 enforced in the code.
 Checking the sqlite docs gave the answer:
 
   When the name of the database file handed to sqlite3_open() or to ATTACH
is an empty string, then a new temporary file is created to hold the
database.
 http://sqlite.org/inmemorydb.html
 
 This is what the patch does:
 
 +-- STMT_ATTACH_TEMPORARY_DATABASE
 +ATTACH DATABASE '' AS temp_query_cache;
 
 And note that the sqlite docs also say:
 
   Even though a disk file is allocated for each temporary database, in
practice the temporary database usually resides in the in-memory pager
cache and hence is very little difference between a pure in-memory
database created by :memory: and a temporary database created by an
empty filename. The sole difference is that a :memory: database must
remain in memory at all times whereas parts of a temporary database
might be flushed to disk if database becomes large or if SQLite comes
under memory pressure.
 
 Neat :)

Actually, we are forcing all temporary databases to be pure memory
in svn_sqlite__open():

  /* Store temporary tables in RAM instead of in temporary files, but don't
 fail on this if this option is disabled in the sqlite compilation by
 setting SQLITE_TEMP_STORE to 0 (always to disk) */
  svn_error_clear(exec_sql(*db, PRAGMA temp_store = MEMORY;));

Is this really a good idea? I think we should set it to FILE by default.
http://sqlite.org/pragma.html#pragma_temp_store


Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 09:23:23PM +0100, Johan Corveleyn wrote:
 I've been wondering about the question how about storing/buffering
 the entire query results in memory? Would this really be a problem,
 even for very large working copies?
 
 I have a quite large working copy checked out here (trunk of an old
 backup of our work repository): 68,806 files, 9,868 folders (not
 counting .svn area of course). Ok, it's not huge, there are certainly
 much larger ones out there, but it's not small either. For comparison:
 svn trunk is only 1,818 files, 227 folders.
 
 I just note that wc.db of this large wc is only 62 MB (the one from
 svn trunk is only 1.6 MB). And this is a working copy with mostly
 .java files, all with 3 properties set (svn:eol-style, svn:keywords
 and cvs2svn:cvs-rev (yeah, this was an old migration experiment, we
 dropped this one when we did the final migration)).
 
 So I'm thinking that any query results will take a maximum of 62 MB of
 memory, and usually a lot less (most queries won't be reading the
 entire db). Or is this too naive?

The sqlite temp_store pragma is a global setting.
It not only applies to proplist, but also to other queries using
temporary tables.

We're trying to devise a general approach to better using wc.db
based on the proplist example. Other commands may need a similar
approach. So we can't just look at proplist to make a decision.
How much data will svn status pull into memory? How much data will
future APIs that we haven't even designed yet pull into memory?

 The above solution with SQLite temp tables seems like a good approach.
 And it would be great if the file-backed temp tables would be
 almost as fast as the in-memory temp tables (with the PRAGMA
 setting). But if the file-backed temp tables would be significantly
 slower, I would consider taking the in-memory route.

I think we should use the default (PRAGMA temp_store = FILE) unless
we encounter a serious problem with it. I believe that this was changed
to MEMORY because that helps with the ridiculous amount of queries we're
doing at the moment. But with the handful of queries we're issuing with
Branko's patch, and the amount of data pulled into a temporary table,
we're more likely to hit the problem where temporary tables don't fit
into memory. When that happens, we fail if temp_store is MEMORY,
and we get a bit slower if temp_store is FILE. Getting a bit slower
is better than failing.

I also think that we shouldn't be changing sqlite default settings unless
there is a very good reason. We're (mostly?) not database developers, so
we're probably better off relying on defaults chosen by sqlite devs.

 Or, even better: make it configurable for the user (client-side config
 file, or something wc-specific (?)), so he can make the choice between
 memory and speed for himself.

That would be interesting to add, and would allow our users to help
us find out whether we might want to flip the default to MEMORY in
the future (e.g. for Subversion 1.8).

 A lot of wc databases out there will be
 so small that the user will hardly notice the memory increase.

All we'd be doing is allowing sqlite to flush data to disk if needed.
Even with a temporary table backed by a file, most operations happen in
memory. Either in buffers managed by sqlite or the operating system's
buffer cache (until sqlite does an fsync). So for small databases it
shouldn't make a difference.


Re: SQLite and callbacks

2011-02-07 Thread Stefan Sperling
On Mon, Feb 07, 2011 at 09:32:48PM +0100, Branko Čibej wrote:
 On 07.02.2011 17:10, Stefan Sperling wrote:
  The bug is probabaly in the following query.
  Maybe the INSERT OR REPLACE doesn't work as intended?
  And why is COMMIT TRANSACTION commented, BTW? Is this the problem?
 
  -- STMT_REPLACE_ACTUAL_PROPS_IN_CACHE
  INSERT OR REPLACE INTO temp_query_cache.node_props_cache
(local_relpath, properties)
SELECT N.local_relpath, N.properties
  FROM actual_node AS N JOIN temp_query_cache.node_props_cache AS C
ON N.local_relpath = C.local_relpath
  AND  N.wc_id = C.wc_id;
 
 Yes, this query that handles actual_nodes is bogus. I've been looking at
 fixing it, will update the patch when I find a solution.
 
 Or if you like, I can just commit it -- but that'd be throwing code over
 my shoulder, as there's little chance that I'd have time to maintain
 such a thing.

If you prefer, post the patch and I will make sure to understand
it sufficiently enough to be able to maintain it when it gets committed.


Re: SQLite and callbacks

2011-02-07 Thread Branko Čibej
On 07.02.2011 21:51, Stefan Sperling wrote:
 A lot of wc databases out there will be
 so small that the user will hardly notice the memory increase.
 All we'd be doing is allowing sqlite to flush data to disk if needed.
 Even with a temporary table backed by a file, most operations happen in
 memory. Either in buffers managed by sqlite or the operating system's
 buffer cache (until sqlite does an fsync). So for small databases it
 shouldn't make a difference.

Indeed, some data points for the patched recursive proplist case,
in-memory vs. file-backed temporary storage:

* subversion trunk working copy (2k nodes): file-backed is 5% slower
* subversion tags tree (200k nodes): file-backed is 10% slower, svn
  proces uses up to 80M of memory.

These numbers are not really significant given the 10x to infinityx
speedup compared to the current code. But I'd suggest that once /all/
the queries are optimized in this way, temporary storage should be made
file-backed by default as that makes horrible memory consumption less
likely.

-- Brane


Re: SQLite and callbacks

2011-02-06 Thread Branko Čibej
On 05.02.2011 21:55, Johan Corveleyn wrote:
 On Sat, Feb 5, 2011 at 9:37 PM, Johan Corveleyn jcor...@gmail.com wrote:
 On Sat, Feb 5, 2011 at 7:14 PM, Stefan Sperling s...@elego.de wrote:
 On Sat, Feb 05, 2011 at 06:47:35PM +0100, Branko Čibej wrote:
 I would not worry about existing clients -- simply mark the existing
 APIs as deprecated, but keep them and do not attempt to improve their
 performance.
 Neglecting performance of backwards compat code is an interesting idea.
 It allows us to focus on the new APIs first and foremost.

 The existing APIs will continue to work using the node walker and issue
 queries per node as they do now. We could consider optimising them later,
 before or after 1.7 release. The required changes are mostly mechanical.
 I agree with most of what's been said here. I think it would be a pity
 to use WC-NG in a way that provides far from optimal performance.

 FWIW, I just did a quick run of your per-directory proplist query vs.
 the per-node version, on my Windows XP platform, to have another data
 point. The performance improvement is significant, but not
 earth-shattering (around 20%).

 Just tested with a freshly checked out working copy of svn trunk:

 1) Per-node queries (r1066540). Looking at the third run, to make sure
 everything is hot in cache:

 $ time svn proplist -R . /dev/null

 real0m1.532s
 user0m0.015s
 sys 0m0.015s


 2) Per-dir queries (r1066541). Looking at the third run, to make sure
 everything is hot in cache:

 $ time svn proplist -R . /dev/null

 real0m1.218s
 user0m0.015s
 sys 0m0.031s


 3) For comparison, I also tested with SlikSVN 1.6.13. This is still
 more than twice as fast:

 $ time svn proplist -R . /dev/null

 real0m0.578s
 user0m0.015s
 sys 0m0.046s
 I should've added one more test, to put things in perspective :-),
 namely the test with r1039808 (the per-wc query version):

 $ time svn proplist -R . /dev/null

 real0m0.328s
 user0m0.015s
 sys 0m0.031s

 (but, as you probably already know, this execution is not cancellable,
 which is pretty annoying :-), especially when I forget to redirect to
 /dev/null, but let it write on the console).

 Cheers,

Clearly the full-wc query is the best solution, and it's not really hard
to avoid the possible deadlocks associated with running callbacks from
within a transaction. And even the best current queries are doing way
too much in code instead of using the power of SQL to speed thing up.

As a proof of concept, I took the recursive proplist function, then
changed it it issue the callbacks outside sqlite transactions and made
the thing cancelable, by using a file-based temporary database to cache
the query results, and offloading most of the heavy lifting to sqlite.

Here's my resulton a fresh trunk working copy:

$ time svn proplist -R . /dev/null

real0m0.066s
user0m0.047s
sys0m0.014s

And here's current trunk without modifications, on the same working copy:
$ time svn proplist -R . /dev/null

real0m0.586s
user0m0.549s
sys0m0.029s


Just to put things in perspective, I ran both tests on a full checkout
of our tags directory:

With the patch:

$ time svn proplist -R . /dev/null

real0m4.926s
user0m4.178s
sys0m0.679s

Without the patch, I canceled the command after it had been running for
an hour, with CPU usage at more than 95%. So there's clearly quite a bit
of room for improving performance of the working copy.

Note that I didn't run the tests with this patch, so I'm not claiming it
to be bug-free. In fact there's one bug in the recursive proplist of the
WC root, where the root props show up twice -- there's an issue with the
filter in the query there. But it's proof-of-concept after all.

-- Brane

P.S.: By the way, checking out tags got the current trunk svn client
memory usage up to 2.5 gigs before it gave up and hung, and the WC was
borked, and I had to do the checkout in pieces, separately for each tag.
That's less than acceptable.

Index: subversion/libsvn_wc/props.c
===
--- subversion/libsvn_wc/props.c(revision 1067790)
+++ subversion/libsvn_wc/props.c(working copy)
@@ -1711,6 +1711,7 @@
   SVN_ERR(svn_wc__db_read_props_of_immediates(b-db, local_abspath,
   b-receiver_func,
   b-receiver_baton,
+  NULL, NULL,
   scratch_pool));
   return SVN_NO_ERROR;
 }
@@ -1725,47 +1726,42 @@
 void *cancel_baton,
 apr_pool_t *scratch_pool)
 {
-  struct read_dir_props_baton read_dir_baton;
+  apr_hash_t *props;
 
-  if (depth = svn_depth_immediates)
-{
-  apr_hash_t *props;
+  SVN_ERR(svn_wc__db_read_props(props, wc_ctx-db, local_abspath,
+scratch_pool, scratch_pool));
+  if 

Re: SQLite and callbacks

2011-02-05 Thread Stefan Sperling
On Fri, Feb 04, 2011 at 03:29:37AM +0200, Daniel Shahaf wrote:
 (test script attached --- hope it goes through)
 
 Stefan Sperling wrote on Wed, Feb 02, 2011 at 18:53:39 +0100:
  I've made svn proplist issue per-directory queries in r1066541.
  Reviews of this change are most welcome.
  Also, if someone has a nice way of testing performance impacts of
  this change it would be interesting to see the results.
  I have not done any benchmarking myself yet.
  
 
 Working copy of /tags/1.6.*, no local mods, r1066541:
 [[[
 == pristine-t1-1.out ==
 233.55user 78.55system 5:12.08elapsed 100%CPU (0avgtext+0avgdata 
 32176maxresident)k
 0inputs+0outputs (0major+6199minor)pagefaults 0swaps
 
 == pristine-t1-2.out ==
 237.68user 79.22system 5:16.88elapsed 100%CPU (0avgtext+0avgdata 
 32176maxresident)k
 0inputs+0outputs (0major+6200minor)pagefaults 0swaps
 
 == pristine-t1-3.out ==
 232.96user 76.04system 5:08.98elapsed 100%CPU (0avgtext+0avgdata 
 32176maxresident)k
 0inputs+0outputs (0major+6197minor)pagefaults 0swaps
 ]]]
 
 Ditto, r1066540:
 [[[
 == pristine-t2-1.out ==
 253.41user 82.90system 5:36.27elapsed 100%CPU (0avgtext+0avgdata 
 30480maxresident)k
 0inputs+0outputs (0major+6099minor)pagefaults 0swaps
 
 == pristine-t2-2.out ==
 252.31user 82.52system 5:34.81elapsed 100%CPU (0avgtext+0avgdata 
 30480maxresident)k
 0inputs+0outputs (0major+6090minor)pagefaults 0swaps
 
 == pristine-t2-3.out ==
 234.95user 75.25system 5:10.15elapsed 100%CPU (0avgtext+0avgdata 
 30480maxresident)k
 0inputs+0outputs (0major+6092minor)pagefaults 0swaps
 ]]]
 
 So, 5% more memory, but (excluding the t2-3 result) post-change is 7%
 faster than post-change.

Is this difference really significant?


 Ditto, with local prop mods on all *.c/*.py files:
 [[[
 == pset-Rpyc-t1-1.out ==
 219.41user 67.76system 4:47.13elapsed 100%CPU (0avgtext+0avgdata 
 32192maxresident)k
 0inputs+0outputs (0major+6200minor)pagefaults 0swaps
 
 == pset-Rpyc-t1-2.out ==
 218.48user 65.74system 4:44.18elapsed 100%CPU (0avgtext+0avgdata 
 32192maxresident)k
 0inputs+0outputs (0major+6205minor)pagefaults 0swaps
 
 == pset-Rpyc-t1-3.out ==
 219.14user 67.83system 4:46.94elapsed 100%CPU (0avgtext+0avgdata 
 32192maxresident)k
 0inputs+0outputs (0major+6197minor)pagefaults 0swaps
 ]]]
 
 Ditto, r1066540:
 [[[
 == pset-Rpyc-t2-1.out ==
 217.34user 64.90system 4:42.20elapsed 100%CPU (0avgtext+0avgdata 
 30480maxresident)k
 0inputs+0outputs (0major+6097minor)pagefaults 0swaps
 
 == pset-Rpyc-t2-2.out ==
 215.01user 67.10system 4:42.08elapsed 100%CPU (0avgtext+0avgdata 
 30464maxresident)k
 0inputs+0outputs (0major+6097minor)pagefaults 0swaps
 
 == pset-Rpyc-t2-3.out ==
 212.82user 63.68system 4:36.46elapsed 100%CPU (0avgtext+0avgdata 
 30480maxresident)k
 0inputs+0outputs (0major+6094minor)pagefaults 0swaps
 ]]]
 
 Pre-change is faster.

I'll look at profiler output. It has much better granularity.
It's possible that we still run queries elsewhere that affect the results.
We need to know whether the runtime overhead of sqlite queries has decreased.

Of course, with some usage patterns (e.g. working copies which contain only
directories, or disproportionally more directories than files),
we can expect little to no difference.

Stefan


Re: SQLite and callbacks

2011-02-05 Thread Stefan Sperling
On Sat, Feb 05, 2011 at 01:11:28PM +0100, Stefan Sperling wrote:
 I'll look at profiler output. It has much better granularity.

I took 3 profiled runs of svn pl -v -R on an svn-trunk working copy
with and without the patch.
The results were pretty much the same for all runs.
The numbers below are from the second run.

The change has reduced the number of queries we issue,
but we are still issuing way too many queries for the impact to
be significant.

Before:
  called/total   parents 
index  %timeself descendents  called+selfname   index
  called/total   children

0.141.038467/8467sqlite3Step [8]
[10]55.20.141.038467 sqlite3VdbeExec [10]
 [children of
  sqlite3VdbeExec omitted]


  %   cumulative   self  self total   
 time   seconds   secondscalls  ms/call  ms/call  name
 22.7   0.48 0.48 _mcount [19]
 12.3   0.74 0.26 __mcount [25]
 10.4   0.96 0.22   478996 0.00 0.00  
sqlite3BtreeMovetoUnpacked [18]
  6.6   1.10 0.14 8467 0.02 0.14  sqlite3VdbeExec [10]


After:
  called/total   parents 
index  %timeself descendents  called+selfname   index
  called/total   children

0.171.074832/4832sqlite3Step [8]
[9] 64.40.171.074832 sqlite3VdbeExec [9]
[again, children o
 sqlite3VdbeExec omitted]


  %   cumulative   self  self total   
 time   seconds   secondscalls  ms/call  ms/call  name
 21.8   0.42 0.42 _mcount [18]
 13.5   0.68 0.26   473315 0.00 0.00  
sqlite3BtreeMovetoUnpacked [16]
  8.8   0.85 0.17 4832 0.04 0.26  sqlite3VdbeExec [9]
  8.8   1.02 0.17 __mcount [20]


The number of sqlite3VdbeExec() invocations has been reduced from 8467 to 4832.
But 4832 queries is still way too much when compared to not crawling
the tree at all. I don't have related gprof data anymore, but the log
message of r1039808 contains some hints:

 gprof says this change speeds up svn proplist -R svn-trunk-wc /dev/null
 on my box from about 0.35s to about 0.11s. During profiled runs, the function
 sqlite3Step() went from using 27% of the total time down to 15%. The time spent
 within and below svn_client_proplist3() went down from 51% to 36%.
 The profiler overhead accounted for about 31% of the total run time
 before this change and about 45% after.

So, as expected, r1039808 had a much greater effect than r1066541.
Note that for our purposes sqlite3Step() is equivalent to sqlite3VdbeExec().
_mcount in the gprof output above shows the profiler overhead.

I think we should strongly consider revving affected callbacks in the
1.7 API and document restriction they have to heed. Then we can bring
the r1039808 code back. We can keep the code added in r1066541 for
backwards-compatibility if desired.

Stefan


Re: SQLite and callbacks

2011-02-05 Thread Stefan Sperling
On Sat, Feb 05, 2011 at 03:29:23PM +0100, Stefan Sperling wrote:
 I think we should strongly consider revving affected callbacks in the
 1.7 API and document restriction they have to heed. Then we can bring
 the r1039808 code back. We can keep the code added in r1066541 for
 backwards-compatibility if desired.

By the way, I think this decision is very important for progress of wc-ng.
And I also think it has way too much impact to be decided on a lazy consensus
basis. So I'm *not* going to move forward with this before I've heard at
least a couple of affirmative opinions. Please consider taking the time
to think about this issue and let your opinion be heard. Thanks.


Re: SQLite and callbacks

2011-02-05 Thread Branko Čibej
On 05.02.2011 15:35, Stefan Sperling wrote:
 On Sat, Feb 05, 2011 at 03:29:23PM +0100, Stefan Sperling wrote:
 I think we should strongly consider revving affected callbacks in the
 1.7 API and document restriction they have to heed. Then we can bring
 the r1039808 code back. We can keep the code added in r1066541 for
 backwards-compatibility if desired.
 By the way, I think this decision is very important for progress of wc-ng.
 And I also think it has way too much impact to be decided on a lazy consensus
 basis. So I'm *not* going to move forward with this before I've heard at
 least a couple of affirmative opinions. Please consider taking the time
 to think about this issue and let your opinion be heard. Thanks.

In that case it would make sense to create a completely new set of APIs
(with a correspondingly new set of callbacks) with different
restrictions so that the implementation can be made fast, i.e., by doing
one query per working copy instead of one per directory or per-file.

I would not worry about existing clients -- simply mark the existing
APIs as deprecated, but keep them and do not attempt to improve their
performance. Clients that are being actively developed will move to the
new, faster infrastructure sooner rather than later. Those that are not
being maintained, or whose maintainers don't want to bother with
changing them, will remain bog slow and slowly fall by the wayside. We
should not be in the business of trying to improve other people's code
for them, only give them the tools to do so if they want to.

In short -- +1 to this approach. If it's not taken, then the whole wc-ng
effort will (almost) have been in vain as far as end users are concerned.

-- Brane


Re: SQLite and callbacks

2011-02-05 Thread Stefan Sperling
On Sat, Feb 05, 2011 at 06:47:35PM +0100, Branko Čibej wrote:
 I would not worry about existing clients -- simply mark the existing
 APIs as deprecated, but keep them and do not attempt to improve their
 performance.

Neglecting performance of backwards compat code is an interesting idea.
It allows us to focus on the new APIs first and foremost.

The existing APIs will continue to work using the node walker and issue
queries per node as they do now. We could consider optimising them later,
before or after 1.7 release. The required changes are mostly mechanical.


Re: SQLite and callbacks

2011-02-05 Thread Johan Corveleyn
On Sat, Feb 5, 2011 at 7:14 PM, Stefan Sperling s...@elego.de wrote:
 On Sat, Feb 05, 2011 at 06:47:35PM +0100, Branko Čibej wrote:
 I would not worry about existing clients -- simply mark the existing
 APIs as deprecated, but keep them and do not attempt to improve their
 performance.

 Neglecting performance of backwards compat code is an interesting idea.
 It allows us to focus on the new APIs first and foremost.

 The existing APIs will continue to work using the node walker and issue
 queries per node as they do now. We could consider optimising them later,
 before or after 1.7 release. The required changes are mostly mechanical.

I agree with most of what's been said here. I think it would be a pity
to use WC-NG in a way that provides far from optimal performance.

FWIW, I just did a quick run of your per-directory proplist query vs.
the per-node version, on my Windows XP platform, to have another data
point. The performance improvement is significant, but not
earth-shattering (around 20%).

Just tested with a freshly checked out working copy of svn trunk:

1) Per-node queries (r1066540). Looking at the third run, to make sure
everything is hot in cache:

$ time svn proplist -R . /dev/null

real0m1.532s
user0m0.015s
sys 0m0.015s


2) Per-dir queries (r1066541). Looking at the third run, to make sure
everything is hot in cache:

$ time svn proplist -R . /dev/null

real0m1.218s
user0m0.015s
sys 0m0.031s


3) For comparison, I also tested with SlikSVN 1.6.13. This is still
more than twice as fast:

$ time svn proplist -R . /dev/null

real0m0.578s
user0m0.015s
sys 0m0.046s


Cheers,
-- 
Johan


Re: SQLite and callbacks

2011-02-05 Thread Johan Corveleyn
On Sat, Feb 5, 2011 at 9:37 PM, Johan Corveleyn jcor...@gmail.com wrote:
 On Sat, Feb 5, 2011 at 7:14 PM, Stefan Sperling s...@elego.de wrote:
 On Sat, Feb 05, 2011 at 06:47:35PM +0100, Branko Čibej wrote:
 I would not worry about existing clients -- simply mark the existing
 APIs as deprecated, but keep them and do not attempt to improve their
 performance.

 Neglecting performance of backwards compat code is an interesting idea.
 It allows us to focus on the new APIs first and foremost.

 The existing APIs will continue to work using the node walker and issue
 queries per node as they do now. We could consider optimising them later,
 before or after 1.7 release. The required changes are mostly mechanical.

 I agree with most of what's been said here. I think it would be a pity
 to use WC-NG in a way that provides far from optimal performance.

 FWIW, I just did a quick run of your per-directory proplist query vs.
 the per-node version, on my Windows XP platform, to have another data
 point. The performance improvement is significant, but not
 earth-shattering (around 20%).

 Just tested with a freshly checked out working copy of svn trunk:

 1) Per-node queries (r1066540). Looking at the third run, to make sure
 everything is hot in cache:

 $ time svn proplist -R . /dev/null

 real    0m1.532s
 user    0m0.015s
 sys     0m0.015s


 2) Per-dir queries (r1066541). Looking at the third run, to make sure
 everything is hot in cache:

 $ time svn proplist -R . /dev/null

 real    0m1.218s
 user    0m0.015s
 sys     0m0.031s


 3) For comparison, I also tested with SlikSVN 1.6.13. This is still
 more than twice as fast:

 $ time svn proplist -R . /dev/null

 real    0m0.578s
 user    0m0.015s
 sys     0m0.046s

I should've added one more test, to put things in perspective :-),
namely the test with r1039808 (the per-wc query version):

$ time svn proplist -R . /dev/null

real0m0.328s
user0m0.015s
sys 0m0.031s

(but, as you probably already know, this execution is not cancellable,
which is pretty annoying :-), especially when I forget to redirect to
/dev/null, but let it write on the console).

Cheers,
-- 
Johan


Re: SQLite and callbacks

2011-02-03 Thread Daniel Shahaf
(test script attached --- hope it goes through)

Stefan Sperling wrote on Wed, Feb 02, 2011 at 18:53:39 +0100:
 I've made svn proplist issue per-directory queries in r1066541.
 Reviews of this change are most welcome.
 Also, if someone has a nice way of testing performance impacts of
 this change it would be interesting to see the results.
 I have not done any benchmarking myself yet.
 

Working copy of /tags/1.6.*, no local mods, r1066541:
[[[
== pristine-t1-1.out ==
233.55user 78.55system 5:12.08elapsed 100%CPU (0avgtext+0avgdata 
32176maxresident)k
0inputs+0outputs (0major+6199minor)pagefaults 0swaps

== pristine-t1-2.out ==
237.68user 79.22system 5:16.88elapsed 100%CPU (0avgtext+0avgdata 
32176maxresident)k
0inputs+0outputs (0major+6200minor)pagefaults 0swaps

== pristine-t1-3.out ==
232.96user 76.04system 5:08.98elapsed 100%CPU (0avgtext+0avgdata 
32176maxresident)k
0inputs+0outputs (0major+6197minor)pagefaults 0swaps
]]]

Ditto, r1066540:
[[[
== pristine-t2-1.out ==
253.41user 82.90system 5:36.27elapsed 100%CPU (0avgtext+0avgdata 
30480maxresident)k
0inputs+0outputs (0major+6099minor)pagefaults 0swaps

== pristine-t2-2.out ==
252.31user 82.52system 5:34.81elapsed 100%CPU (0avgtext+0avgdata 
30480maxresident)k
0inputs+0outputs (0major+6090minor)pagefaults 0swaps

== pristine-t2-3.out ==
234.95user 75.25system 5:10.15elapsed 100%CPU (0avgtext+0avgdata 
30480maxresident)k
0inputs+0outputs (0major+6092minor)pagefaults 0swaps
]]]

So, 5% more memory, but (excluding the t2-3 result) post-change is 7%
faster than post-change.


Ditto, with local prop mods on all *.c/*.py files:
[[[
== pset-Rpyc-t1-1.out ==
219.41user 67.76system 4:47.13elapsed 100%CPU (0avgtext+0avgdata 
32192maxresident)k
0inputs+0outputs (0major+6200minor)pagefaults 0swaps

== pset-Rpyc-t1-2.out ==
218.48user 65.74system 4:44.18elapsed 100%CPU (0avgtext+0avgdata 
32192maxresident)k
0inputs+0outputs (0major+6205minor)pagefaults 0swaps

== pset-Rpyc-t1-3.out ==
219.14user 67.83system 4:46.94elapsed 100%CPU (0avgtext+0avgdata 
32192maxresident)k
0inputs+0outputs (0major+6197minor)pagefaults 0swaps
]]]

Ditto, r1066540:
[[[
== pset-Rpyc-t2-1.out ==
217.34user 64.90system 4:42.20elapsed 100%CPU (0avgtext+0avgdata 
30480maxresident)k
0inputs+0outputs (0major+6097minor)pagefaults 0swaps

== pset-Rpyc-t2-2.out ==
215.01user 67.10system 4:42.08elapsed 100%CPU (0avgtext+0avgdata 
30464maxresident)k
0inputs+0outputs (0major+6097minor)pagefaults 0swaps

== pset-Rpyc-t2-3.out ==
212.82user 63.68system 4:36.46elapsed 100%CPU (0avgtext+0avgdata 
30480maxresident)k
0inputs+0outputs (0major+6094minor)pagefaults 0swaps
]]]

Pre-change is faster.


 Stefan

Daniel
(with thanks to svn-qavm)
#!/bin/sh -ex

dirs=$HOME/src/svn/t1 $HOME/src/svn/t2
wc=$1
nick=$2
[ $# -gt 2 ]  exit 1

for i in $dirs; do
  svn=$i/subversion/svn/svn
  $svn pl -qvR $wc /dev/null
  for j in 1 2 3; do
f=$nick-`basename $i`-$j.out
rm -f $f
time $svn pl -qvR $wc /dev/null 2$f
  done
done



Re: SQLite and callbacks

2011-02-02 Thread Stefan Sperling
On Thu, Jan 13, 2011 at 01:25:27PM +, Stefan Sperling wrote:
 This is about callbacks in the libsvn_client(!) and libsvn_wc APIs.
 Those are widely used by third parties so we should really make
 the right decision.
 
 Do we trust third parties to heed restrictions we place on callbacks?
 There is no way can enforce the restrictions. We can only document them
 and hope that third party developers play along. However, this would
 give us the best performance we can theoretically get (based on the
 assumption that the number of sqlite queries we run is a major
 limiting factor).
 
 Or we could decide not to run callbacks while sqlite transactions
 are in progress, and pay some performance overhead. We'd have to
 run multiple queries and provide data to the callback in chunks
 (of, say, directory-sized units). I'm not sure if this would provide
 adequate performance, but I haven't taken any measurements either (yet).
 A recent change of this kind I made to the node walker didn't make
 to make any notable difference.
 
 We may need both approaches anyway, if we decide to use the second
 approach in backwards-compat code.
 
 Opinions?

I've made svn proplist issue per-directory queries in r1066541.
Reviews of this change are most welcome.
Also, if someone has a nice way of testing performance impacts of
this change it would be interesting to see the results.
I have not done any benchmarking myself yet.

Stefan


Re: SQLite and callbacks

2011-01-13 Thread Stefan Sperling
On Wed, Jan 12, 2011 at 09:46:47AM -0500, C. Michael Pilato wrote:
 Nothing to add to the immediate discussion at this time.  Just wanted to
 note that we have essentially the same problems to deal with in the BDB
 backend.  The solution we've taken there is don't drive public API
 callbacks from inside BDB transactions.  It's not always the best for
 performance, but the FS API is such a high profile API for third-party
 consumers (very unlike the WC API in that respect, I'd imagine) that
 simplicity of the API rules was valued.
 

This is about callbacks in the libsvn_client(!) and libsvn_wc APIs.
Those are widely used by third parties so we should really make
the right decision.

Do we trust third parties to heed restrictions we place on callbacks?
There is no way can enforce the restrictions. We can only document them
and hope that third party developers play along. However, this would
give us the best performance we can theoretically get (based on the
assumption that the number of sqlite queries we run is a major
limiting factor).

Or we could decide not to run callbacks while sqlite transactions
are in progress, and pay some performance overhead. We'd have to
run multiple queries and provide data to the callback in chunks
(of, say, directory-sized units). I'm not sure if this would provide
adequate performance, but I haven't taken any measurements either (yet).
A recent change of this kind I made to the node walker didn't make
to make any notable difference.

We may need both approaches anyway, if we decide to use the second
approach in backwards-compat code.

Opinions?

Stefan


Re: SQLite and callbacks

2011-01-13 Thread C. Michael Pilato
On 01/13/2011 08:25 AM, Stefan Sperling wrote:
 On Wed, Jan 12, 2011 at 09:46:47AM -0500, C. Michael Pilato wrote:
 Nothing to add to the immediate discussion at this time.  Just wanted to
 note that we have essentially the same problems to deal with in the BDB
 backend.  The solution we've taken there is don't drive public API
 callbacks from inside BDB transactions.  It's not always the best for
 performance, but the FS API is such a high profile API for third-party
 consumers (very unlike the WC API in that respect, I'd imagine) that
 simplicity of the API rules was valued.

 
 This is about callbacks in the libsvn_client(!) and libsvn_wc APIs.
 Those are widely used by third parties so we should really make
 the right decision.
 
 Do we trust third parties to heed restrictions we place on callbacks?
 There is no way can enforce the restrictions.

Really?  Our BDB code detects re-entry from within a transaction and errors
out.  :-)  It does this by keeping a simple boolean state variable that is
set whenever the code enters a BDB transaction, and cleared when the
transaction is committed or aborted.  If that variable is set, and some
other code comes along to create a new (nested) transaction, and error is
raised.

Could we do something similar in our SQL logic?

 Or we could decide not to run callbacks while sqlite transactions
 are in progress, and pay some performance overhead. We'd have to
 run multiple queries and provide data to the callback in chunks
 (of, say, directory-sized units). I'm not sure if this would provide
 adequate performance, but I haven't taken any measurements either (yet).
 A recent change of this kind I made to the node walker didn't make
 to make any notable difference.

So, yeah, we really need more information to answer questions of
performance, it seems.  In the meantime, we need to answer questions of
correctness, maintainability, and ease-of-use.

 We may need both approaches anyway, if we decide to use the second
 approach in backwards-compat code.

I don't see how we'd ever need both approaches.  Can you explain?

-- 
C. Michael Pilato cmpil...@collab.net
CollabNet  www.collab.net  Distributed Development On Demand



signature.asc
Description: OpenPGP digital signature


Re: SQLite and callbacks

2011-01-13 Thread Stefan Sperling
On Thu, Jan 13, 2011 at 09:03:10AM -0500, C. Michael Pilato wrote:
 On 01/13/2011 08:25 AM, Stefan Sperling wrote:
  On Wed, Jan 12, 2011 at 09:46:47AM -0500, C. Michael Pilato wrote:
  Nothing to add to the immediate discussion at this time.  Just wanted to
  note that we have essentially the same problems to deal with in the BDB
  backend.  The solution we've taken there is don't drive public API
  callbacks from inside BDB transactions.  It's not always the best for
  performance, but the FS API is such a high profile API for third-party
  consumers (very unlike the WC API in that respect, I'd imagine) that
  simplicity of the API rules was valued.
 
  
  This is about callbacks in the libsvn_client(!) and libsvn_wc APIs.
  Those are widely used by third parties so we should really make
  the right decision.
  
  Do we trust third parties to heed restrictions we place on callbacks?
  There is no way can enforce the restrictions.
 
 Really?  Our BDB code detects re-entry from within a transaction and errors
 out.  :-)  It does this by keeping a simple boolean state variable that is
 set whenever the code enters a BDB transaction, and cleared when the
 transaction is committed or aborted.  If that variable is set, and some
 other code comes along to create a new (nested) transaction, and error is
 raised.
 
 Could we do something similar in our SQL logic?

This is a nice idea. I'll think about it.

  Or we could decide not to run callbacks while sqlite transactions
  are in progress, and pay some performance overhead. We'd have to
  run multiple queries and provide data to the callback in chunks
  (of, say, directory-sized units). I'm not sure if this would provide
  adequate performance, but I haven't taken any measurements either (yet).
  A recent change of this kind I made to the node walker didn't make
  to make any notable difference.
 
 So, yeah, we really need more information to answer questions of
 performance, it seems.  In the meantime, we need to answer questions of
 correctness, maintainability, and ease-of-use.

There is no question that calling callbacks while sqlite transactions
are running provides best performance. Instead of crawling a tree on
disk and running a query for each node (or a set of nodes), we can
simply walk the db and pass information to the callback.
This only works for read-only operations like proplist and status,
of course, but those are the ones that need to perform really well
so they can efficiently be used by GUIs, for instance.

  We may need both approaches anyway, if we decide to use the second
  approach in backwards-compat code.
 
 I don't see how we'd ever need both approaches.  Can you explain?

If we decide that the new approach violates our backwards compat rules,
we'd have to provide a separate implementation to continue supporting
semantics of old APIs.

Stefan


Re: SQLite and callbacks

2011-01-13 Thread C. Michael Pilato
On 01/13/2011 10:10 AM, Stefan Sperling wrote:
 If we decide that the new approach violates our backwards compat rules,
 we'd have to provide a separate implementation to continue supporting
 semantics of old APIs.

Ah, okay.  This is precise what I did in BDB recently with the
svn_fs_get_locks() API.  It's callback-ish, and the callbacks were being
driven from way deep in the our BDB table cursor-crawling code.  To make the
API usable to third-parties (who almost *always* want to do something FS-ish
with the lock information they get), I now -- just inside the API, squirrel
the lock information from the BDB layer away to a tempfile, then read the
tempfile back to drive the public callback system.  It's not lovely, it's
not fast, but it saves API consumers from having to essentially do the same
thing anyway.

-- 
C. Michael Pilato cmpil...@collab.net
CollabNet  www.collab.net  Distributed Development On Demand



signature.asc
Description: OpenPGP digital signature


Re: SQLite and callbacks

2011-01-12 Thread Stefan Sperling
On Tue, Nov 30, 2010 at 12:33:38PM +, Philip Martin wrote:
 Hyrum K. Wright hyrum_wri...@mail.utexas.edu writes:
 
  Stefan's patch to make a recursive proplist much more performant
  highlights the great benefit that our sqlite-backed storage can have.
  However, he reverted it due to concerns about the potential for
  database contention.  The theory was that the callback might try and
  call additional wc functions to get more information, and such nested
  statements weren't healthy for sqlite.  We talked about it for a bit
  in IRC this morning, and the picture raised by this issue was quite
  dire.
 
 It's not the nested statements that are the problem, it's extending the
 duration of the select.  The callback is to the application so it could
 be updating a GUI, interacting with the user, etc. and so could take a
 much greater length of time than other selects.  An SQLite read blocks
 SQlite writes by other threads/processes, so the long lived select may
 cause writes to fail that would otherwise succeed.  (We have a busy
 timeout of 10 seconds so that's the sort of time that becomes a
 problem.)
 
 It's related to the problem of querying the working copy while it is
 locked.  In 1.6 it was possible to run status, info, propget, etc. while
 an update was in progress.  In 1.7 that's no longer possible, all the
 read operations fail.  If we decide that this change in behaviour is
 acceptable then the long-lived select isn't really a problem, it simply
 stops the operations like update from starting.  If we want the 1.6
 behaviour then the long-lived select becomes more of a problem, as it
 makes update much more likely to fail part way through.
 
 The callback is not necessarily wrong, it depends what sort of behaviour
 we are trying to provide.

I think we should review our requirements for callbacks again.

recap

In 1.7 we have two layers of locking.
One is the sqlite locking, which guarantees meta data consistency in wc.db.
The other is working copy write locks (svn_wc__db_wclock_obtain())
which are obtained at the start of long-running operations which
modify the working copy (e.g. commit, update, merge, patch, ...).

The problem we're facing is that we want to allow read-only operations
(e.g. status, proplist, ...) to run concurrently with N other read-only
operations and at most one write operation.

For instance, the user might have TortoiseSVN and some IDE SVN plugin
running while working with the CLI client on the same working copy.
The CLI client should be able to perform write operations while the
other clients periodically poll the WC status to update their icons.

The WC write lock will not pose a problem here since it is only taken by
writers of which there must at most be one at a time (for a given subtree
of the WC). Readers won't attempt to acquire this lock.

The sqlite locks on the other hand creates a problem during wc.db queries.
A writer blocks all readers, and any active reader will block the writer
(see http://sqlite.org/atomiccommit.html).

Our solution to this problem is to keep the queries short, so that
sqlite locking will not cause long-running read or write operations
to stall or fail.

/recap

The problem with callbacks is that they can in theory take a long
amount of time to complete. So when running callbacks while an sqlite
query is running, the sqlite query could take a long time to complete.

We can solve this problem by never calling back to the client
while an sqlite transaction is in progress. However this has an impact
on performance. Even if retrieving the desired information is possible
with a single query, we have to run multiple queries to retrieve the
information in chunks (e.g. per directory) so we can pass it to the
callback.

The other option is to place restrictions on what a client callback
is allowed to do, depending on which locking context the callback runs in.
A locking context implies restrictions on what the callback is allowed to do. 

E.g. we could say that the proplist callback runs within a
wc.db locking context. This would be a highly restrictive context,
because an sqlite transaction is in progress.
The only action allowed is to consume the data passed in (e.g. print it or
store it to a temporary file or a buffer for later processing).
The callback should complete its task as quickly as possible to avoid
disturbing other SVN clients.
No calls to any function in libsvn_client or libsvn_wc would be allowed
from this context. If the callback doesn't comply with this restriction,
there is a risk of undefined behaviour of the entire system. This means
that a badly behaving client can disturb user experience.
But it also allows us to be maximally efficient.
If backwards compatibility is a concern, we could allow callbacks to
be called in this context only for API calls that exist in 1.7 and
newer. Backwards-compat code would use a different implementation with
a less restrictive locking context for these callbacks. This would allow
existing 

Re: SQLite and callbacks

2011-01-12 Thread C. Michael Pilato
Nothing to add to the immediate discussion at this time.  Just wanted to
note that we have essentially the same problems to deal with in the BDB
backend.  The solution we've taken there is don't drive public API
callbacks from inside BDB transactions.  It's not always the best for
performance, but the FS API is such a high profile API for third-party
consumers (very unlike the WC API in that respect, I'd imagine) that
simplicity of the API rules was valued.



signature.asc
Description: OpenPGP digital signature


Re: SQLite and callbacks

2010-11-30 Thread Philip Martin
Hyrum K. Wright hyrum_wri...@mail.utexas.edu writes:

 Stefan's patch to make a recursive proplist much more performant
 highlights the great benefit that our sqlite-backed storage can have.
 However, he reverted it due to concerns about the potential for
 database contention.  The theory was that the callback might try and
 call additional wc functions to get more information, and such nested
 statements weren't healthy for sqlite.  We talked about it for a bit
 in IRC this morning, and the picture raised by this issue was quite
 dire.

It's not the nested statements that are the problem, it's extending the
duration of the select.  The callback is to the application so it could
be updating a GUI, interacting with the user, etc. and so could take a
much greater length of time than other selects.  An SQLite read blocks
SQlite writes by other threads/processes, so the long lived select may
cause writes to fail that would otherwise succeed.  (We have a busy
timeout of 10 seconds so that's the sort of time that becomes a
problem.)

It's related to the problem of querying the working copy while it is
locked.  In 1.6 it was possible to run status, info, propget, etc. while
an update was in progress.  In 1.7 that's no longer possible, all the
read operations fail.  If we decide that this change in behaviour is
acceptable then the long-lived select isn't really a problem, it simply
stops the operations like update from starting.  If we want the 1.6
behaviour then the long-lived select becomes more of a problem, as it
makes update much more likely to fail part way through.

The callback is not necessarily wrong, it depends what sort of behaviour
we are trying to provide.

-- 
Philip


Re: SQLite and callbacks

2010-11-30 Thread Stefan Sperling
On Mon, Nov 29, 2010 at 07:43:40PM -0600, Hyrum K. Wright wrote:
 We use callbacks extensively throughout our code as a means of
 providing streamy feedback to callers.  It's a pretty good paradigm,
 and one that has served us well.  We don't put many restrictions on
 what the callbacks can do in terms of fetching more information or
 calling other functions.
 
 Enter wc-ng.
 
 Stefan's patch to make a recursive proplist much more performant
 highlights the great benefit that our sqlite-backed storage can have.
 However, he reverted it due to concerns about the potential for
 database contention.  The theory was that the callback might try and
 call additional wc functions to get more information, and such nested
 statements weren't healthy for sqlite.  We talked about it for a bit
 in IRC this morning, and the picture raised by this issue was quite
 dire.
 
 In an attempt to find out what the consequences of these nested
 queries are, I wrote a test program to attempt to demonstrate the
 failure, only now I can't seem to do so.  Attached is the test
 program, but when I run it, I'm able to successfully execute multiple
 prepared statements on the same set of rows simultaneously, which was
 the concern we had about our callback mechanism in sqlite.
 
 So is this a valid problem?  If so, could somebody use the attached
 test program to illustrate it for those of us who may not fully
 understand the situation?

If you run this version of your test, it will run in an endless loop.
The callback inserts new values, the caller will see those and invoke
the callback again.

We need a way to prevent writes done within the callback from being
visible to the caller.


#include stdio.h
#include string.h

#include sqlite3.h

#define CHECK_ERR  \
  if (sqlite3_errcode(db) \
 (sqlite3_errcode(db) != SQLITE_ROW) \
 (sqlite3_errcode(db) != SQLITE_DONE))  \
fprintf(stderr, %d: %d: %s\n, __LINE__, sqlite3_errcode(db), 
sqlite3_errmsg(db));

#define TEST_DATA \
  create table foo (num int, message text);   \
   \
  insert into foo values (1, 'A is for Allegator');  \
  insert into foo values (2, 'B is for Bayou');  \
  insert into foo values (3, 'C is for Cyprus Trees');  \
  insert into foo values (4, 'D is for Dew');  \
  insert into foo values (5, 'E is for Everything like');  \
  insert into foo values (6, 'F Ferns or');  \
  insert into foo values (7, 'G Grass that''s');  \
  insert into foo values (8, 'H Home to you');  \
  

void callback(sqlite3 *db, int num)
{
  const char *query = insert into foo values (?1,?2);;
  sqlite3_stmt *stmt;
  const unsigned char *msg = new message for you!;

  printf(Got number: %d\n, num);

  sqlite3_prepare_v2(db, query, -1, stmt, NULL);
  CHECK_ERR;

  sqlite3_bind_int(stmt, 1, 42);
  sqlite3_bind_text(stmt, 2, msg, strlen(msg) + 1, SQLITE_STATIC);
  CHECK_ERR;

  sqlite3_step(stmt);
  CHECK_ERR;

  sqlite3_finalize(stmt);
  CHECK_ERR;
}

void get_numbers(sqlite3 *db,
 void (*callback)(sqlite3 *, int))
{
  const char *query = select num from foo;;
  sqlite3_stmt *stmt;
  int code;

  sqlite3_prepare_v2(db, query, -1, stmt, NULL);
  CHECK_ERR;

  code = sqlite3_step(stmt);
  CHECK_ERR;
  while (code == SQLITE_ROW)
{
  int number = sqlite3_column_int(stmt, 0);
  callback(db, number);

  code = sqlite3_step(stmt);
  CHECK_ERR;
}

  sqlite3_finalize(stmt);
  CHECK_ERR;
}


int
main(int argc, char *argv[])
{
  sqlite3 *db;

  remove(test.db);

  sqlite3_open_v2(test.db, db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
  NULL);
  CHECK_ERR;

  sqlite3_extended_result_codes(db, 1);
  CHECK_ERR;

  sqlite3_exec(db, TEST_DATA, NULL, NULL, NULL);
  CHECK_ERR;

  get_numbers(db, callback);

  sqlite3_close(db);

  return 0;
}



Re: SQLite and callbacks

2010-11-30 Thread Stefan Sperling
On Tue, Nov 30, 2010 at 01:32:27PM +, Philip Martin wrote:
 Stefan Sperling s...@elego.de writes:
 
  If you run this version of your test, it will run in an endless loop.
  The callback inserts new values, the caller will see those and invoke
  the callback again.
 
 Hmm, I expected the select to block the write causing it to return
 SQLITE_BUSY.  That is what happens if the write is done by a separate
 process, but apparently not when it's the same process (or maybe it's
 using the same database handle that makes it work).

With a different db handle, the callback cannot insert values into the db:

$ ./test
Got number: 1
45: 5: database is locked
48: 5: database is locked
Got number: 2
45: 5: database is locked
48: 5: database is locked
Got number: 3
45: 5: database is locked
48: 5: database is locked
Got number: 4
45: 5: database is locked
48: 5: database is locked
Got number: 5
45: 5: database is locked
48: 5: database is locked
Got number: 6
45: 5: database is locked
48: 5: database is locked
Got number: 7
45: 5: database is locked
48: 5: database is locked
Got number: 8
45: 5: database is locked
48: 5: database is locked


#include stdio.h
#include string.h

#include sqlite3.h

#define CHECK_ERR  \
  if (sqlite3_errcode(db) \
 (sqlite3_errcode(db) != SQLITE_ROW) \
 (sqlite3_errcode(db) != SQLITE_DONE))  \
fprintf(stderr, %d: %d: %s\n, __LINE__, sqlite3_errcode(db), 
sqlite3_errmsg(db));

#define TEST_DATA \
  create table foo (num int, message text);   \
   \
  insert into foo values (1, 'A is for Allegator');  \
  insert into foo values (2, 'B is for Bayou');  \
  insert into foo values (3, 'C is for Cyprus Trees');  \
  insert into foo values (4, 'D is for Dew');  \
  insert into foo values (5, 'E is for Everything like');  \
  insert into foo values (6, 'F Ferns or');  \
  insert into foo values (7, 'G Grass that''s');  \
  insert into foo values (8, 'H Home to you');  \
  

void callback(int num)
{
  const char *query = insert into foo values (?1,?2);;
  sqlite3_stmt *stmt;
  const unsigned char *msg = new message for you!;
  sqlite3 *db;

  sqlite3_open_v2(test.db, db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
  NULL);

  printf(Got number: %d\n, num);

  sqlite3_prepare_v2(db, query, -1, stmt, NULL);
  CHECK_ERR;

  sqlite3_bind_int(stmt, 1, 42);
  sqlite3_bind_text(stmt, 2, msg, strlen(msg) + 1, SQLITE_STATIC);
  CHECK_ERR;

  sqlite3_step(stmt);
  CHECK_ERR;

  sqlite3_finalize(stmt);
  CHECK_ERR;

  sqlite3_close(db);
}

void get_numbers(sqlite3 *db,
 void (*callback)(int))
{
  const char *query = select num from foo;;
  sqlite3_stmt *stmt;
  int code;

  sqlite3_prepare_v2(db, query, -1, stmt, NULL);
  CHECK_ERR;

  code = sqlite3_step(stmt);
  CHECK_ERR;
  while (code == SQLITE_ROW)
{
  int number = sqlite3_column_int(stmt, 0);
  callback(number);

  code = sqlite3_step(stmt);
  CHECK_ERR;
}

  sqlite3_finalize(stmt);
  CHECK_ERR;
}


int
main(int argc, char *argv[])
{
  sqlite3 *db;

  remove(test.db);

  sqlite3_open_v2(test.db, db, SQLITE_OPEN_READWRITE | SQLITE_OPEN_CREATE,
  NULL);
  CHECK_ERR;

  sqlite3_extended_result_codes(db, 1);
  CHECK_ERR;

  sqlite3_exec(db, TEST_DATA, NULL, NULL, NULL);
  CHECK_ERR;

  get_numbers(db, callback);

  sqlite3_close(db);

  return 0;
}