iilyak commented on a change in pull request #3766:
URL: https://github.com/apache/couchdb/pull/3766#discussion_r773319558



##########
File path: src/smoosh/src/smoosh_channel.erl
##########
@@ -194,21 +215,128 @@ handle_info(check_window, State0) ->
         end,
     erlang:send_after(60 * 1000, self(), check_window),
     {noreply, FinalState};
+handle_info(persist_queue, State0) ->
+    #state{waiting = Queue} = State0,
+    write_state_to_file(State0),
+    smoosh_priority_queue:write_to_file(Queue),
+    Checkpoint =
+        config:get_integer(
+            "smoosh", "state_checkpoint_interval_in_sec", 
?DEFAULT_CHECKPOINT_INTERVAL_IN_SEC
+        ) * 1000,
+    erlang:send_after(Checkpoint, self(), persist_queue),
+    {noreply, State0};
 handle_info(pause, State0) ->
-    {ok, State} = code_change(nil, State0, nil),
+    State = maybe_open_queue(State0),
     {noreply, State#state{paused = true}};
 handle_info(unpause, State0) ->
-    {ok, State} = code_change(nil, State0, nil),
+    State = maybe_open_queue(State0),
     {noreply, maybe_start_compaction(State#state{paused = false})}.
 
-terminate(_Reason, _State) ->
+terminate(_Reason, #state{name = Name, waiting = Q}) ->
+    file:delete(active_file_name(Name)),
+    file:delete(starting_file_name(Name)),
+    if
+        Q =/= nil ->
+            smoosh_priority_queue:close(Q);
+        true ->
+            nil
+    end,
     ok.
 
-code_change(_OldVsn, #state{} = State, _Extra) ->
-    {ok, State}.
+maybe_recover_state(#state{name = Name} = State) ->
+    Active = recover(active_file_name(Name)),
+    Starting = recover(starting_file_name(Name)),
+    DatabaseDir = config:get("couchdb", "database_dir"),
+    ViewDir = config:get("couchdb", "view_index_dir"),
+    Active1 = get_matching_compact_files(DatabaseDir, Active),
+    Active2 = get_matching_compact_files(ViewDir, Active),
+    Active3 = Active1 ++ Active2,
+    State#state{active = Active3, starting = Starting}.
+
+get_matching_compact_files(Dir, Active) ->
+    MatchingFiles = filelib:fold_files(
+        Dir,
+        "^[a-zA-Z0-9_.-]*.compact$",
+        true,
+        (fun(FilePath, Acc) ->
+            FilePrefix = filename:rootname(FilePath, ".compact"),
+            case lists:keyfind(FilePrefix, 1, Active) of

Review comment:
       This is going to be slow. The `lists:keyfind/3` takes `O(n)` to get the 
value. We repeat the `lists:keyfind/3` for each file candidate. IRC `Active` 
list is a term of the following structure `[{FilePath, Pid}]`. In this case the 
following would work better (not tested pseudo-code):
   
   ```
   maybe_recover_state(#state{name = Name} = State) ->
       Active = recover(active_file_name(Name)),
       ActiveMap = maps:from_list(Active),
       Active1 = get_matching_compact_files(DatabaseDir, ActiveMap),
       Active2 = get_matching_compact_files(ViewDir, ActiveMap),
       ....
   
   get_matching_compact_files(Dir, ActiveMap) ->
       MatchingFilesMap = filelib:fold_files(
           Dir,
           "^[a-zA-Z0-9_.-]*.compact$",
           true,
           fun(FilePath, Acc) ->
               FilePrefix = filename:rootname(FilePath, ".compact"),
               maps:remove(FilePrefix, Acc)
           end, ActiveMap),
        maps:to_list(MatchingFilesMap).
   ```
   
   With this version the delete from map is O(log(n)) and the n is getting 
smaller on each iteration. This function is probably the only one in the whole 
module which need to be scalable. Because on big clusters there might a lot of 
files in the state term and even more on the filesystem.
   
   




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to