davisp commented on a change in pull request #2638: Prototype/fdb layer couch 
views size tests
URL: https://github.com/apache/couchdb/pull/2638#discussion_r388512954
 
 

 ##########
 File path: src/couch_views/test/couch_views_size_test.erl
 ##########
 @@ -0,0 +1,562 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_views_size_test).
+
+-include_lib("eunit/include/eunit.hrl").
+-include_lib("couch/include/couch_db.hrl").
+-include_lib("couch/include/couch_eunit.hrl").
+-include_lib("couch_mrview/include/couch_mrview.hrl").
+-include_lib("fabric/include/fabric2.hrl").
+-include_lib("couch_views/include/couch_views.hrl").
+
+% N.B., we should move to couch_ejson_size instead
+% of erlang:external_size
+%
+% to calculate view size:
+% total = 0
+% for (fdb_k, fdb_v) in VIEW_MAP_RANGE:
+%   {EncUserKey, EncUserval} = erlfdb_tuple:unpack(fdb_v),
+%   UserKey = couch_views_encoding:decode(EncUserKey),
+%   UserVal = couch_views_encoding:decode(EncUserVal),
+%   total += erlang:external_size(UserKey),
+%   total += erlang:external_size(UserVal)
+%
+% Our goal in checking the size calculations is that we cover
+% as much of the possible key mutation space as possible while
+% not relying on fuzzing out the edge cases. Conceptually we have
+% two sets of keys E and U. E is keys as currently exist in the
+% view, and U is the new set of keys corresponding to an update.
+%
+% Both sets E and U have the same possible set of state variables:
+%
+% 1. N unique keys, where 0 =< N =< infinity
+% 2. D keys with duplicates, where 0 =< D =< N,
+% 3. R repeats for each member of D, for 2 =< R =< infinity
+%
+% Given two sets S1 and S2, we then have a set of transition variables:
+%
+% 1. deltaN - shared unique keys, where 0 =< deltaN =< N
+% 2. deltaD - shared duplicates, where 0 =< deltaD =< N
+% 3. deltaR - shared repeats for each D, where 2 =< deltaR =< infinity
+%
+% To search our state transition space, we can create two functions to
+% first define our start and end states, and for each transition we have
+% a function that defines the shared overlap between states.
+%
+% Given a list of transitions are checks then become simple in that
+% we can iterate over each transition checking that our index is valid
+% after each one. Index validation will purely look at the existing
+% state of the index in fdb and validate correctness.
+
+-define(NUM_SINGLE_TESTS, 100).
+-define(NUM_MULTI_TESTS, 100).
+
+-define(N_DOMAIN, [0, 1, 2, 5]).
+-define(D_DOMAIN, [0, 1, 2, 5]).
+-define(R_DOMAIN, [2, 4]).
+
+-define(DELTA_N_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_D_DOMAIN, [0, 1, 2, 5]).
+-define(DELTA_R_DOMAIN, [1, 2, 4]).
+
+
+generate_sets() ->
+    permute(?N_DOMAIN, ?D_DOMAIN, ?R_DOMAIN, fun(N, D, R) ->
+        % We can't have more duplicates than total keys
+        case D > N of
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Only include one of the repeat values
+        % for our zero sets
+        case D == 0 of
+            true when R == 2 -> ok;
+            true -> throw(skip);
+            false -> ok
+        end,
+
+        % Replace R with a sentinel value for sanity
+        % when there are no dupes to have repeats
+        ActualR = if D == 0 -> 0; true -> R end,
+
+        {N, D, ActualR}
+    end).
+
+
+generate_transitions() ->
+    Sets = generate_sets(),
+    Pairs = [{Set1, Set2} || Set1 <- Sets, Set2 <- Sets],
+    lists:flatmap(fun({{N1, D1, _R1} = S1, {N2, D2, _R2} = S2}) ->
+        Filter = fun(DeltaN, DeltaD, DeltaR) ->
+            % Can't share more keys than the smaller of the
+            % two sets
+            case DeltaN > min(N1, N2) of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all combinations of DeltaD and
+            % DeltaR are equivalent tests
+            case DeltaN == 0 of
+                true when DeltaD == 0, DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % Can't share more dupes than exist in either set
+            % or the total number of shared keys
+            case DeltaD > min(D1, D2) orelse DeltaD > DeltaN of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % For DeltaD == 0, all DeltaR correspond to the
+            % same test so only include one instance
+            case DeltaD == 0 of
+                true when DeltaR == 1 -> ok;
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            % If we have more non-repeated keys in our
+            % transition than there's "room" for in the target
+            % set it isn't a valid test case.
+            TransitionNonRepeats = DeltaN - DeltaD,
+            TargetNonRepeats = N2 - D2,
+            case TransitionNonRepeats > TargetNonRepeats of
+                true -> throw(skip);
+                false -> ok
+            end,
+
+            {S1, S2, {DeltaN, DeltaD, DeltaR}}
+        end,
+        permute(?DELTA_N_DOMAIN, ?DELTA_D_DOMAIN, ?DELTA_R_DOMAIN, Filter)
+    end, Pairs).
+
+
+permute(NList, DList, RList, Filter) ->
+    % Technically we could call into Filter in each
+    % outer loops to conditionally skip inner loops.
+    % If someone comes along looking to speed up the
+    % fixture setup time, this would likely be an
+    % easy win.
+    lists:foldl(fun(N, NAcc) ->
+        lists:foldl(fun(D, DAcc) ->
+            lists:foldl(fun(R, RAcc) ->
+                try
+                    [Filter(N, D, R) | RAcc]
+                catch throw:skip ->
+                    RAcc
+                end
+            end, DAcc, RList)
+        end, NAcc, DList)
+    end, [], NList).
+
+
+row_transition_test_() ->
+    {
+        "Test view size tracking",
+        {
+            setup,
+            fun setup/0,
+            fun cleanup/1,
+            fun create_transition_tests/1
+        }
+    }.
+
+
+setup() ->
+    Ctx = test_util:start_couch([
+            fabric,
+            couch_jobs,
+            couch_js,
+            couch_views
+        ]),
+    {ok, Db} = fabric2_db:create(?tempdb(), [{user_ctx, ?ADMIN_USER}]),
+    {Ctx, Db}.
+
+
+cleanup({Ctx, Db}) ->
+    ok = fabric2_db:delete(fabric2_db:name(Db), []),
+    test_util:stop_couch(Ctx).
+
+
+create_transition_tests({_Ctx, Db}) ->
+    Transitions = generate_transitions(),
+    Single = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("single ~s", [tname(T)])),
+        [{Name, fun() -> check_single_transition(Db, T) end}]
+    end, lists:sort(Transitions)),
+    Multi = lists:flatmap(fun(T) ->
+        Name = lists:flatten(io_lib:format("multi ~s", [tname(T)])),
+        [{Name, fun() -> check_multi_transition(Db, T) end}]
+    end, lists:sort(group(shuffle(Transitions)))),
+    subset(?NUM_SINGLE_TESTS, Single) ++ subset(?NUM_MULTI_TESTS, Multi).
+
+
+check_single_transition(Db, {Set1, Set2, Transition}) ->
+    clear_views(Db),
+    InitKVs = init_set(Set1, [a, b, c, d, e]),
+    CommonKVs = reduce_set(Transition, InitKVs),
+    FinalKVs = fill_set(Set2, CommonKVs, [v, w, x, y, z]),
+    {InitJSONKVs, Bindings} = unlabel(InitKVs, #{}),
+    {FinalJSONKVs, _} = unlabel(FinalKVs, Bindings),
+
+    Sig = couch_uuids:random(),
+    DocId = couch_uuids:random(),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
+        write_docs(TxDb, Sig, [make_doc(DocId, InitJSONKVs)])
+    end),
+
+    fabric2_fdb:transactional(Db, fun(TxDb) ->
 
 Review comment:
   To put that another way, I wanted to avoid all of the iterating over changes 
and calling out to JavaScript so that I was just testing this particular bit of 
code. But I didn't want to remove the way that its actually executed when its 
part of the larger indexer system.

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


With regards,
Apache Git Services

Reply via email to