Introduce a #leaf{} record in revision trees

This is substantially based on work by Bob Dionne (a452a4a) to introduce
the data size calculations at Cloudant. There's quite a bit of conflict
in code and actual behavior between this work and what Filipe wrote for
CouchDB. This new record should ease the transition of merging both
behaviors.

An important thing to note is that this record is only ever in RAM and
not written to disk so we don't have to worry about record upgrades
though we will have to maintain upgrade info that Filipe and Bob both
introduced (which is fairly straightforward).


Project: http://git-wip-us.apache.org/repos/asf/couchdb-couch/repo
Commit: http://git-wip-us.apache.org/repos/asf/couchdb-couch/commit/85cf2b26
Tree: http://git-wip-us.apache.org/repos/asf/couchdb-couch/tree/85cf2b26
Diff: http://git-wip-us.apache.org/repos/asf/couchdb-couch/diff/85cf2b26

Branch: refs/heads/import
Commit: 85cf2b26e52135b75135587cc553abb635c2a2c2
Parents: dc5a6de
Author: Robert Newson <[email protected]>
Authored: Sun Mar 10 16:12:28 2013 -0500
Committer: Paul J. Davis <[email protected]>
Committed: Fri Jan 17 16:44:31 2014 -0800

----------------------------------------------------------------------
 include/couch_db.hrl     |  9 +++++++++
 src/couch_db.erl         | 22 +++++++---------------
 src/couch_db_updater.erl | 40 +++++++++++++++++-----------------------
 src/couch_doc.erl        | 13 ++++++++-----
 src/couch_util.erl       | 11 +++++++++++
 5 files changed, 52 insertions(+), 43 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/include/couch_db.hrl
----------------------------------------------------------------------
diff --git a/include/couch_db.hrl b/include/couch_db.hrl
index 77006e4..61a59f7 100644
--- a/include/couch_db.hrl
+++ b/include/couch_db.hrl
@@ -23,6 +23,8 @@
 
 -define(b2l(V), binary_to_list(V)).
 -define(l2b(V), list_to_binary(V)).
+-define(i2b(V), couch_util:integer_to_boolean(V)).
+-define(b2i(V), couch_util:boolean_to_integer(V)).
 -define(term_to_bin(T), term_to_binary(T, [{minor_version, 1}])).
 -define(term_size(T),
     try
@@ -271,3 +273,10 @@
     stop_fun
 }).
 
+-record(leaf,  {
+    deleted,
+    ptr,
+    seq,
+    size = nil
+}).
+

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db.erl
----------------------------------------------------------------------
diff --git a/src/couch_db.erl b/src/couch_db.erl
index 7734c7c..e4e8cca 100644
--- a/src/couch_db.erl
+++ b/src/couch_db.erl
@@ -580,7 +580,7 @@ prep_and_validate_update(Db, #doc{id=Id,revs={RevStart, 
Revs}}=Doc,
     case Revs of
     [PrevRev|_] ->
         case dict:find({RevStart, PrevRev}, LeafRevsDict) of
-        {ok, {Deleted, DiskSp, DiskRevs}} ->
+        {ok, {#leaf{deleted=Deleted, ptr=DiskSp}, DiskRevs}} ->
             case couch_doc:has_stubs(Doc) of
             true ->
                 DiskDoc = make_doc(Db, Id, Deleted, DiskSp, DiskRevs),
@@ -643,12 +643,8 @@ prep_and_validate_updates(Db, [DocBucket|RestBuckets],
         AllowConflict, AccPrepped, AccErrors) ->
     Leafs = couch_key_tree:get_all_leafs(OldRevTree),
     LeafRevsDict = dict:from_list([
-        begin
-            Deleted = element(1, LeafVal),
-            Sp = element(2, LeafVal),
-            {{Start, RevId}, {Deleted, Sp, Revs}}
-        end ||
-        {LeafVal, {Start, [RevId | _]} = Revs} <- Leafs
+        {{Start, RevId}, {Leaf, Revs}} ||
+        {Leaf, {Start, [RevId | _]} = Revs} <- Leafs
     ]),
     {PreppedBucket, AccErrors3} = lists:foldl(
         fun({Doc, Ref}, {Docs2Acc, AccErrors2}) ->
@@ -895,9 +891,7 @@ make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #doc{}} | 
RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos-1, RestPath);
 make_first_doc_on_disk(Db, Id, Pos, [{_Rev, ?REV_MISSING}|RestPath]) ->
     make_first_doc_on_disk(Db, Id, Pos - 1, RestPath);
-make_first_doc_on_disk(Db, Id, Pos, [{_Rev, RevValue} |_]=DocPath) ->
-    IsDel = element(1, RevValue),
-    Sp = element(2, RevValue),
+make_first_doc_on_disk(Db, Id, Pos, [{_Rev, #leaf{deleted=IsDel, ptr=Sp}} 
|_]=DocPath) ->
     Revs = [Rev || {Rev, _} <- DocPath],
     make_doc(Db, Id, IsDel, Sp, {Pos, Revs}).
 
@@ -1243,9 +1237,7 @@ open_doc_revs_int(Db, IdRevs, Options) ->
                     ?REV_MISSING ->
                         % we have the rev in our list but know nothing about it
                         {{not_found, missing}, {Pos, Rev}};
-                    RevValue ->
-                        IsDeleted = element(1, RevValue),
-                        SummaryPtr = element(2, RevValue),
+                    #leaf{deleted=IsDeleted, ptr=SummaryPtr} ->
                         {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, 
FoundRevPath)}
                     end
                 end, FoundRevs),
@@ -1297,8 +1289,8 @@ 
doc_meta_info(#doc_info{high_seq=Seq,revs=[#rev_info{rev=Rev}|RestInfo]}, RevTre
         [{revs_info, Pos, lists:map(
             fun({Rev1, ?REV_MISSING}) ->
                 {Rev1, missing};
-            ({Rev1, RevValue}) ->
-                case element(1, RevValue) of
+            ({Rev1, Leaf}) ->
+                case Leaf#leaf.deleted of
                 true ->
                     {Rev1, deleted};
                 false ->

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_db_updater.erl
----------------------------------------------------------------------
diff --git a/src/couch_db_updater.erl b/src/couch_db_updater.erl
index c64911e..cc48ef8 100644
--- a/src/couch_db_updater.erl
+++ b/src/couch_db_updater.erl
@@ -158,10 +158,8 @@ handle_call({purge_docs, IdRevs}, _From, Db) ->
     {DocInfoToUpdate, NewSeq} = lists:mapfoldl(
         fun(#full_doc_info{rev_tree=Tree}=FullInfo, SeqAcc) ->
             Tree2 = couch_key_tree:map_leafs(
-                fun(_RevId, LeafVal) ->
-                    IsDeleted = element(1, LeafVal),
-                    BodyPointer = element(2, LeafVal),
-                    {IsDeleted, BodyPointer, SeqAcc + 1}
+                fun(_RevId, Leaf) ->
+                    Leaf#leaf{seq=SeqAcc+1}
                 end, Tree),
             {FullInfo#full_doc_info{rev_tree=Tree2}, SeqAcc + 1}
         end, LastSeq, FullDocInfoToUpdate),
@@ -340,37 +338,35 @@ rev_tree(DiskTree) ->
     couch_key_tree:mapfold(fun
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, leaf, _Acc) ->
             % pre 1.2 format, will be upgraded on compaction
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, nil};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, 
nil};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, nil}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq}, 
Acc};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, leaf, Acc) ->
             Acc2 = sum_leaf_sizes(Acc, Size),
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc2};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, 
size=Size}, Acc2};
         (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}, branch, Acc) ->
-            {{IsDeleted == 1, BodyPointer, UpdateSeq, Size}, Acc};
+            {#leaf{deleted=?i2b(IsDeleted), ptr=BodyPointer, seq=UpdateSeq, 
size=Size}, Acc};
         (_RevId, ?REV_MISSING, _Type, Acc) ->
             {?REV_MISSING, Acc}
-    end, DiskTree).
+    end, 0, DiskTree).
 
 disk_tree(RevTree) ->
     couch_key_tree:map(fun
         (_RevId, ?REV_MISSING) ->
             ?REV_MISSING;
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, nil};
-        (_RevId, {IsDeleted, BodyPointer, UpdateSeq, Size}) ->
-            {if IsDeleted -> 1; true -> 0 end, BodyPointer, UpdateSeq, Size}
+        (_RevId, #leaf{deleted=IsDeleted, ptr=BodyPointer, seq=UpdateSeq, 
size=Size}) ->
+            {?b2i(IsDeleted), BodyPointer, UpdateSeq, Size}
     end, RevTree).
 
 btree_by_seq_split(#full_doc_info{id=Id, update_seq=Seq, deleted=Del, 
rev_tree=T}) ->
-    {Seq, {Id, if Del -> 1; true -> 0 end, disk_tree(T)}}.
+    {Seq, {Id, ?b2i(Del), disk_tree(T)}}.
 
 btree_by_seq_join(Seq, {Id, Del, DiskTree}) when is_integer(Del) ->
     {RevTree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = Seq,
-        deleted = (Del == 1),
+        deleted = ?i2b(Del),
         rev_tree = RevTree,
         leafs_size = LeafsSize
     };
@@ -388,14 +384,14 @@ btree_by_seq_join(KeySeq, {Id, RevInfos, 
DeletedRevInfos}) ->
 
 btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
         deleted=Deleted, rev_tree=Tree}) ->
-    {Id, {Seq, if Deleted -> 1; true -> 0 end, disk_tree(Tree)}}.
+    {Id, {Seq, ?b2i(Deleted), disk_tree(Tree)}}.
 
 btree_by_id_join(Id, {HighSeq, Deleted, DiskTree}) ->
     {Tree, LeafsSize} = rev_tree(DiskTree),
     #full_doc_info{
         id = Id,
         update_seq = HighSeq,
-        deleted = (Deleted == 1),
+        deleted = ?i2b(Deleted),
         rev_tree = Tree,
         leafs_size = LeafsSize
     }.
@@ -573,7 +569,8 @@ flush_trees(#db{fd = Fd} = Db,
                 TotalSize = lists:foldl(
                     fun(#att{att_len = L}, A) -> A + L end,
                     SummarySize, Value#doc.atts),
-                NewValue = {IsDeleted, NewSummaryPointer, UpdateSeq, 
TotalSize},
+                NewValue = #leaf{deleted=IsDeleted, ptr=NewSummaryPointer,
+                                 seq=UpdateSeq, size=TotalSize},
                 case Type of
                 leaf ->
                     {NewValue, Acc + TotalSize};
@@ -899,10 +896,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) 
->
             Info#full_doc_info{rev_tree=couch_key_tree:map(
                 fun(_, _, branch) ->
                     ?REV_MISSING;
-                (_Rev, LeafVal, leaf) ->
-                    IsDel = element(1, LeafVal),
-                    Sp = element(2, LeafVal),
-                    Seq = element(3, LeafVal),
+                (_Rev, #leaf{ptr=Sp}=Leaf, leaf) ->
                     {_Body, AttsInfo} = Summary = copy_doc_attachments(
                         Db, Sp, DestFd),
                     SummaryChunk = make_doc_summary(NewDb, Summary),
@@ -911,7 +905,7 @@ copy_docs(Db, #db{fd = DestFd} = NewDb, MixedInfos, Retry) 
->
                     TotalLeafSize = lists:foldl(
                         fun({_, _, _, AttLen, _, _, _, _}, S) -> S + AttLen 
end,
                         SummarySize, AttsInfo),
-                    {IsDel, Pos, Seq, TotalLeafSize}
+                    Leaf#leaf{ptr=Pos, size=TotalLeafSize}
                 end, RevTree)}
         end, NewInfos0),
 

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_doc.erl
----------------------------------------------------------------------
diff --git a/src/couch_doc.erl b/src/couch_doc.erl
index bcf79d2..6f2ca9b 100644
--- a/src/couch_doc.erl
+++ b/src/couch_doc.erl
@@ -330,7 +330,10 @@ max_seq(Tree, UpdateSeq) ->
             {_Deleted, _DiskPos, OldTreeSeq} ->
                 % Older versions didn't track data sizes.
                 erlang:max(MaxOldSeq, OldTreeSeq);
-            {_Deleted, _DiskPos, OldTreeSeq, _Size} ->
+            {_Deleted, _DiskPos, OldTreeSeq, _Size} -> % necessary clause?
+                % Older versions didn't store #leaf records.
+                erlang:max(MaxOldSeq, OldTreeSeq);
+            #leaf{seq=OldTreeSeq} ->
                 erlang:max(MaxOldSeq, OldTreeSeq);
             _ ->
                 MaxOldSeq
@@ -341,11 +344,11 @@ max_seq(Tree, UpdateSeq) ->
 to_doc_info_path(#full_doc_info{id=Id,rev_tree=Tree,update_seq=FDISeq}) ->
     RevInfosAndPath = [
         {#rev_info{
-            deleted = element(1, LeafVal),
-            body_sp = element(2, LeafVal),
-            seq = element(3, LeafVal),
+            deleted = Leaf#leaf.deleted,
+            body_sp = Leaf#leaf.ptr,
+            seq = Leaf#leaf.seq,
             rev = {Pos, RevId}
-        }, Path} || {LeafVal, {Pos, [RevId | _]} = Path} <-
+        }, Path} || {Leaf, {Pos, [RevId | _]} = Path} <-
             couch_key_tree:get_all_leafs(Tree)
     ],
     SortedRevInfosAndPath = lists:sort(

http://git-wip-us.apache.org/repos/asf/couchdb-couch/blob/85cf2b26/src/couch_util.erl
----------------------------------------------------------------------
diff --git a/src/couch_util.erl b/src/couch_util.erl
index 3556d36..d09211a 100644
--- a/src/couch_util.erl
+++ b/src/couch_util.erl
@@ -29,6 +29,7 @@
 -export([encode_doc_id/1]).
 -export([with_db/2]).
 -export([rfc1123_date/0, rfc1123_date/1]).
+-export([integer_to_boolean/1, boolean_to_integer/1]).
 
 -include_lib("couch/include/couch_db.hrl").
 
@@ -487,3 +488,13 @@ month(9) -> "Sep";
 month(10) -> "Oct";
 month(11) -> "Nov";
 month(12) -> "Dec".
+
+integer_to_boolean(1) ->
+    true;
+integer_to_boolean(0) ->
+    false.
+
+boolean_to_integer(true) ->
+    1;
+boolean_to_integer(false) ->
+    0.

Reply via email to