This is an automated email from the ASF dual-hosted git repository. davisp pushed a commit to branch prototype/views in repository https://gitbox.apache.org/repos/asf/couchdb.git
commit 6481b98cb42471c276d7c60489f582a7893b800f Author: Paul J. Davis <[email protected]> AuthorDate: Tue Jul 23 16:02:29 2019 -0500 Store data sizes for views --- src/couch_views/include/couch_views.hrl | 1 + src/couch_views/src/couch_views_fdb.erl | 58 +++++++++++++++++++---- src/couch_views/test/couch_views_indexer_test.erl | 10 ++-- 3 files changed, 54 insertions(+), 15 deletions(-) diff --git a/src/couch_views/include/couch_views.hrl b/src/couch_views/include/couch_views.hrl index 525f62f..2e443eb 100644 --- a/src/couch_views/include/couch_views.hrl +++ b/src/couch_views/include/couch_views.hrl @@ -17,6 +17,7 @@ -define(VIEW_MAP_RANGE, 3). -define(VIEW_ROW_COUNT, 0). +-define(VIEW_KV_SIZE, 1). -define(VIEW_ROW_KEY, 0). -define(VIEW_ROW_VALUE, 1). diff --git a/src/couch_views/src/couch_views_fdb.erl b/src/couch_views/src/couch_views_fdb.erl index a0e4bd1..7a2d7e3 100644 --- a/src/couch_views/src/couch_views_fdb.erl +++ b/src/couch_views/src/couch_views_fdb.erl @@ -17,6 +17,7 @@ set_update_seq/3, get_row_count/3, + get_kv_size/3, fold_map_idx/6, @@ -73,6 +74,18 @@ get_row_count(TxDb, #mrst{sig = Sig}, ViewId) -> end. +get_kv_size(TxDb, #mrst{sig = Sig}, ViewId) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + + case erlfdb:wait(erlfdb:get(Tx, kv_size_key(DbPrefix, Sig, ViewId))) of + not_found -> 0; % Can this happen? + SizeBin -> ?bin2uint(SizeBin) + end. + + fold_map_idx(TxDb, Sig, ViewId, Options, Callback, Acc0) -> #{ db_prefix := DbPrefix @@ -121,9 +134,10 @@ write_doc(TxDb, Sig, _ViewIds, #{deleted := true} = Doc) -> ExistingViewKeys = get_view_keys(TxDb, Sig, DocId), clear_id_idx(TxDb, Sig, DocId), - lists:foreach(fun({ViewId, TotalKeys, UniqueKeys}) -> + lists:foreach(fun({ViewId, TotalKeys, TotalSize, UniqueKeys}) -> clear_map_idx(TxDb, Sig, ViewId, DocId, UniqueKeys), - update_row_count(TxDb, Sig, ViewId, -TotalKeys) + update_row_count(TxDb, Sig, ViewId, -TotalKeys), + update_kv_size(TxDb, Sig, ViewId, -TotalSize) end, ExistingViewKeys); write_doc(TxDb, Sig, ViewIds, Doc) -> @@ -140,12 +154,17 @@ write_doc(TxDb, Sig, ViewIds, Doc) -> update_id_idx(TxDb, Sig, ViewId, DocId, NewRows), ExistingKeys = case lists:keyfind(ViewId, 1, ExistingViewKeys) of - {ViewId, TotalRows, EKeys} -> - Change = length(NewRows) - TotalRows, - update_row_count(TxDb, Sig, ViewId, Change), + {ViewId, TotalRows, TotalSize, EKeys} -> + RowChange = length(NewRows) - TotalRows, + SizeChange = calculate_row_size(NewRows) - TotalSize, + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), EKeys; false -> - update_row_count(TxDb, Sig, ViewId, length(NewRows)), + RowChange = length(NewRows), + SizeChange = calculate_row_size(NewRows), + update_row_count(TxDb, Sig, ViewId, RowChange), + update_kv_size(TxDb, Sig, ViewId, SizeChange), [] end, update_map_idx(TxDb, Sig, ViewId, DocId, ExistingKeys, NewRows) @@ -275,7 +294,8 @@ update_id_idx(TxDb, Sig, ViewId, DocId, NewRows) -> couch_log:error("Updating ID index: ~p ~p ~p ~p", [ViewId, DocId, NewRows, Unique]), Key = id_idx_key(DbPrefix, Sig, DocId, ViewId), - Val = couch_views_encoding:encode([length(NewRows), Unique]), + RowSize = calculate_row_size(NewRows), + Val = couch_views_encoding:encode([length(NewRows), RowSize, Unique]), ok = erlfdb:set(Tx, Key, Val). @@ -313,8 +333,8 @@ get_view_keys(TxDb, Sig, DocId) -> lists:map(fun({K, V}) -> {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId} = erlfdb_tuple:unpack(K, DbPrefix), - [TotalKeys, UniqueKeys] = couch_views_encoding:decode(V), - {ViewId, TotalKeys, UniqueKeys} + [TotalKeys, TotalSize, UniqueKeys] = couch_views_encoding:decode(V), + {ViewId, TotalKeys, TotalSize, UniqueKeys} end, erlfdb:get_range(Tx, Start, End, [])). @@ -327,6 +347,15 @@ update_row_count(TxDb, Sig, ViewId, Increment) -> erlfdb:add(Tx, Key, Increment). +update_kv_size(TxDb, Sig, ViewId, Increment) -> + #{ + tx := Tx, + db_prefix := DbPrefix + } = TxDb, + Key = kv_size_key(DbPrefix, Sig, ViewId), + erlfdb:add(Tx, Key, Increment). + + seq_key(DbPrefix, Sig) -> Key = {?DB_VIEWS, Sig, ?VIEW_UPDATE_SEQ}, erlfdb_tuple:pack(Key, DbPrefix). @@ -337,6 +366,11 @@ row_count_key(DbPrefix, Sig, ViewId) -> erlfdb_tuple:pack(Key, DbPrefix). +kv_size_key(DbPrefix, Sig, ViewId) -> + Key = {?DB_VIEWS, Sig, ?VIEW_ID_INFO, ViewId, ?VIEW_KV_SIZE}, + erlfdb_tuple:pack(Key, DbPrefix). + + id_idx_key(DbPrefix, Sig, DocId, ViewId) -> Key = {?DB_VIEWS, Sig, ?VIEW_ID_RANGE, DocId, ViewId}, erlfdb_tuple:pack(Key, DbPrefix). @@ -381,3 +415,9 @@ process_rows(Rows) -> end, {0, []}, Vals), Labeled ++ DAcc end, [], Grouped). + + +calculate_row_size(Rows) -> + lists:foldl(fun({K, V}, Acc) -> + Acc + erlang:external_size(K) + erlang:external_size(V) + end, 0, Rows). diff --git a/src/couch_views/test/couch_views_indexer_test.erl b/src/couch_views/test/couch_views_indexer_test.erl index fa0d99e..9218064 100644 --- a/src/couch_views/test/couch_views_indexer_test.erl +++ b/src/couch_views/test/couch_views_indexer_test.erl @@ -157,10 +157,9 @@ updated_docs_are_reindexed(Db) -> DbName = fabric2_db:name(Db), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), Sig = Mrst#mrst.sig, - Expect = [{0, 1, [1]}, {1, 0, []}], fabric2_fdb:transactional(Db, fun(TxDb) -> - ?assertEqual( - Expect, + ?assertMatch( + [{0, 1, _, [1]}, {1, 0, 0, []}], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) ) end). @@ -214,10 +213,9 @@ updated_docs_without_changes_are_reindexed(Db) -> DbName = fabric2_db:name(Db), {ok, Mrst} = couch_views_util:ddoc_to_mrst(DbName, DDoc), Sig = Mrst#mrst.sig, - Expect = [{0, 1, [0]}, {1, 0, []}], fabric2_fdb:transactional(Db, fun(TxDb) -> - ?assertEqual( - Expect, + ?assertMatch( + [{0, 1, _, [0]}, {1, 0, 0, []}], couch_views_fdb:get_view_keys(TxDb, Sig, <<"0">>) ) end).
