jaydoane commented on code in PR #4003:
URL: https://github.com/apache/couchdb/pull/4003#discussion_r861216302


##########
src/mem3/test/eunit/mem3_reshard_test.erl:
##########
@@ -38,11 +38,13 @@ setup() ->
     PartProps = [{partitioned, true}, {hash, [couch_partition, hash, []]}],
     create_db(Db2, [{q, 1}, {n, 1}, {props, PartProps}]),
     config:set("reshard", "retry_interval_sec", "0", _Persist = false),
+    config:set("reshard", "index_retry_interval_sec", "0", _Persist = false),

Review Comment:
   The OTP 24 compiler complains:
   ```
   src/mem3/test/eunit/mem3_reshard_test.erl:41:60: Warning: variable 
'_Persist' is already bound. If you mean to ignore this value, use '_' or a 
different underscore-prefixed name
   src/mem3/test/eunit/mem3_reshard_test.erl:48:52: Warning: variable 
'_Persist' is already bound. If you mean to ignore this value, use '_' or a 
different underscore-prefixed name
   ```



##########
src/mem3/src/mem3_reshard_index.erl:
##########
@@ -108,46 +95,102 @@ dreyfus_indices(DbName, Doc) ->
 hastings_indices(DbName, Doc) ->
     try
         Indices = hastings_index:design_doc_to_indexes(Doc),
-        [{hastings, DbName, Index} || Index <- Indices]
+        [{?HASTINGS, DbName, Index} || Index <- Indices]
     catch
         Tag:Err ->
             Msg = "~p couldn't get hasting indices ~p ~p ~p:~p",
             couch_log:error(Msg, [?MODULE, DbName, Doc, Tag, Err]),
             []
     end.
 
-build_index({mrview, DbName, MRSt}) ->
+build_index({?MRVIEW, DbName, MRSt} = Ctx, Try) ->

Review Comment:
   This could be condensed somewhat by abstracting the await/retry boilerplate 
further:
   ```diff
   --- a/src/mem3/src/mem3_reshard_index.erl
   +++ b/src/mem3/src/mem3_reshard_index.erl
   @@ -103,42 +103,36 @@ hastings_indices(DbName, Doc) ->
                []
        end.
    
   -build_index({?MRVIEW, DbName, MRSt} = Ctx, Try) ->
   -    case couch_index_server:get_index(couch_mrview_index, MRSt) of
   -        {ok, Pid} ->
   -            try
   -                case couch_index:get_state(Pid, get_update_seq(DbName)) of
   -                    {ok, _} -> ok;
   -                    Error -> maybe_retry(Ctx, Error, Try)
   -                end
   -            catch
   -                _:CatchError ->
   -                    maybe_retry(Ctx, CatchError, Try)
   -            end;
   -        OpenError ->
   -            maybe_retry(Ctx, OpenError, Try)
   -    end;
   +build_index({?MRVIEW, _DbName, MRSt} = Ctx, Try) ->
   +    await_retry(
   +        couch_index_server:get_index(couch_mrview_index, MRSt),
   +        fun couch_index:get_state/2,
   +        Ctx,
   +        Try
   +    );
    build_index({?DREYFUS, DbName, DIndex} = Ctx, Try) ->
   -    case dreyfus_index_manager:get_index(DbName, DIndex) of
   -        {ok, Pid} ->
   -            try
   -                case dreyfus_index:await(Pid, get_update_seq(DbName)) of
   -                    {ok, _, _} -> ok;
   -                    Error -> maybe_retry(Ctx, Error, Try)
   -                end
   -            catch
   -                _:CatchError ->
   -                    maybe_retry(Ctx, CatchError, Try)
   -            end;
   -        OpenError ->
   -            maybe_retry(Ctx, OpenError, Try)
   -    end;
   +    await_retry(
   +        dreyfus_index_manager:get_index(DbName, DIndex),
   +        fun dreyfus_index:await/2,
   +        Ctx,
   +        Try
   +    );
    build_index({?HASTINGS, DbName, HIndex} = Ctx, Try) ->
   -    case hastings_index_manager:get_index(DbName, HIndex) of
   +    await_retry(
   +        hastings_index_manager:get_index(DbName, HIndex),
   +        fun hastings_index:await/2,
   +        Ctx,
   +        Try
   +    ).
   +
   +await_retry(GetIndex, AwaitIndex, Ctx, Try) ->
   +    case GetIndex of
            {ok, Pid} ->
   +            {_, DbName, _} = Ctx,
                try
   -                case hastings_index:await(Pid, get_update_seq(DbName)) of
   +                case AwaitIndex(Pid, get_update_seq(DbName)) of
                        {ok, _} -> ok;
   +                    {ok, _, _} -> ok;
                        Error -> maybe_retry(Ctx, Error, Try)
                    end
                catch
   ```
   but I'm not convinced it's worth it, other than an exercise for me to see 
the exact differences in the three cases, which are unfortunate.



##########
src/mem3/src/mem3_reshard_index.erl:
##########
@@ -108,46 +95,102 @@ dreyfus_indices(DbName, Doc) ->
 hastings_indices(DbName, Doc) ->
     try
         Indices = hastings_index:design_doc_to_indexes(Doc),
-        [{hastings, DbName, Index} || Index <- Indices]
+        [{?HASTINGS, DbName, Index} || Index <- Indices]
     catch
         Tag:Err ->
             Msg = "~p couldn't get hasting indices ~p ~p ~p:~p",
             couch_log:error(Msg, [?MODULE, DbName, Doc, Tag, Err]),
             []
     end.
 
-build_index({mrview, DbName, MRSt}) ->
+build_index({?MRVIEW, DbName, MRSt} = Ctx, Try) ->
     case couch_index_server:get_index(couch_mrview_index, MRSt) of
         {ok, Pid} ->
-            Args = [Pid, get_update_seq(DbName)],
-            WPid = spawn_link(couch_index, get_state, Args),
-            {ok, WPid};
-        Error ->
-            Error
+            try
+                case couch_index:get_state(Pid, get_update_seq(DbName)) of
+                    {ok, _} -> ok;
+                    Error -> maybe_retry(Ctx, Error, Try)
+                end
+            catch
+                _:CatchError ->
+                    maybe_retry(Ctx, CatchError, Try)
+            end;
+        OpenError ->
+            maybe_retry(Ctx, OpenError, Try)
     end;
-build_index({dreyfus, DbName, Index}) ->
-    case dreyfus_index_manager:get_index(DbName, Index) of
+build_index({?DREYFUS, DbName, DIndex} = Ctx, Try) ->
+    case dreyfus_index_manager:get_index(DbName, DIndex) of
         {ok, Pid} ->
-            Args = [Pid, get_update_seq(DbName)],
-            WPid = spawn_link(dreyfus_index, await, Args),
-            {ok, WPid};
-        Error ->
-            Error
+            try
+                case dreyfus_index:await(Pid, get_update_seq(DbName)) of
+                    {ok, _, _} -> ok;
+                    Error -> maybe_retry(Ctx, Error, Try)
+                end
+            catch
+                _:CatchError ->
+                    maybe_retry(Ctx, CatchError, Try)
+            end;
+        OpenError ->
+            maybe_retry(Ctx, OpenError, Try)
     end;
-build_index({hastings, DbName, Index}) ->
-    case hastings_index_manager:get_index(DbName, Index) of
+build_index({?HASTINGS, DbName, HIndex} = Ctx, Try) ->
+    case hastings_index_manager:get_index(DbName, HIndex) of
         {ok, Pid} ->
-            Args = [Pid, get_update_seq(DbName)],
-            WPid = spawn_link(hastings_index, await, Args),
-            {ok, WPid};
-        Error ->
-            Error
+            try
+                case hastings_index:await(Pid, get_update_seq(DbName)) of
+                    {ok, _} -> ok;
+                    Error -> maybe_retry(Ctx, Error, Try)
+                end
+            catch
+                _:CatchErorr ->
+                    maybe_retry(Ctx, CatchErorr, Try)
+            end;
+        OpenError ->
+            maybe_retry(Ctx, OpenError, Try)
     end.
 
+maybe_retry(Ctx, killed = Error, Try) ->
+    retry(Ctx, Error, Try);
+maybe_retry(Ctx, {killed, _} = Error, Try) ->
+    retry(Ctx, Error, Try);
+maybe_retry(Ctx, shutdown = Error, Try) ->
+    retry(Ctx, Error, Try);
+maybe_retry(Ctx, Error, 0) ->
+    fail(Ctx, Error);
+maybe_retry(Ctx, Error, Try) when is_integer(Try), Try > 0 ->
+    retry(Ctx, Error, Try - 1).
+
+retry(Ctx, Error, Try) ->
+    IndexInfo = index_info(Ctx),
+    LogMsg = "~p : error ~p when building ~p, retrying (~p)",
+    couch_log:warning(LogMsg, [?MODULE, Error, IndexInfo, Try]),
+    timer:sleep(retry_interval_sec() * 1000),
+    build_index(Ctx, Try).
+
+fail(Ctx, Error) ->
+    IndexInfo = index_info(Ctx),
+    LogMsg = "~p : error ~p when building ~p, max tries exceeded, failing",
+    couch_log:error(LogMsg, [?MODULE, Error, IndexInfo]),
+    exit({error_building_index, IndexInfo}).
+
+index_info({?MRVIEW, DbName, MRSt}) ->
+    GroupName = couch_mrview_index:get(idx_name, MRSt),
+    {DbName, GroupName};
+index_info({?DREYFUS, DbName, Index}) ->
+    {DbName, Index};
+index_info({?HASTINGS, DbName, Index}) ->
+    {DbName, Index}.
+
 has_app(App) ->
     code:lib_dir(App) /= {error, bad_name}.
 
 get_update_seq(DbName) ->
     couch_util:with_db(DbName, fun(Db) ->
         couch_db:get_update_seq(Db)
     end).
+
+max_retries() ->
+    config:get_integer("reshard", "index_max_retries", 5).
+
+retry_interval_sec() ->
+    config:get_integer("reshard", "index_retry_interval_sec", 10).

Review Comment:
   Should these defaults be added to `default.ini`?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to