This is an automated email from the ASF dual-hosted git repository.

vatamane pushed a commit to branch qjs
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 73315580a2d48349863b48383417a1837485a46a
Author: Nick Vatamaniuc <vatam...@gmail.com>
AuthorDate: Wed Aug 9 17:04:15 2023 -0400

    [wip] add a couch_scanner app
    
    couch_scanner will run in the background and scans all the dbs and ddocs, 
first
    trying to compile them against quickjs and the existing sm engine then 
possibly
    even run a sample of docs through the map/reduce functions and compare the
    results.
    
    since there could be a large number of dbs and scanning would happened at a 
low
    background rate, there is a periodic checkpointing mechanism to save the
    currently processed db shard checkpoint.
---
 rebar.config.script                                |   1 +
 rel/reltool.config                                 |   2 +
 src/couch_scanner/README.md                        |   4 +
 src/couch_scanner/src/couch_scanner.app.src        |  29 ++++
 src/couch_scanner/src/couch_scanner.erl            |  28 ++++
 src/couch_scanner/src/couch_scanner_app.erl        |  23 +++
 src/couch_scanner/src/couch_scanner_checkpoint.erl |  89 ++++++++++++
 src/couch_scanner/src/couch_scanner_server.erl     | 154 +++++++++++++++++++++
 src/couch_scanner/src/couch_scanner_sup.erl        |  34 +++++
 .../test/eunit/couch_scanner_test.erl              |  34 +++++
 10 files changed, 398 insertions(+)

diff --git a/rebar.config.script b/rebar.config.script
index af7565511..545bbf2cc 100644
--- a/rebar.config.script
+++ b/rebar.config.script
@@ -142,6 +142,7 @@ SubDirs = [
     "src/smoosh",
     "src/weatherreport",
     "src/couch_prometheus",
+    "src/couch_scanner",
     "rel"
 ].
 
diff --git a/rel/reltool.config b/rel/reltool.config
index cac5f0c4d..98f44f461 100644
--- a/rel/reltool.config
+++ b/rel/reltool.config
@@ -64,6 +64,7 @@
         snappy,
         weatherreport,
         couch_prometheus,
+        couch_scanner,
 
         %% extra
         nouveau,
@@ -128,6 +129,7 @@
     {app, snappy, [{incl_cond, include}]},
     {app, weatherreport, [{incl_cond, include}]},
     {app, couch_prometheus, [{incl_cond, include}]},
+    {app, couch_scanner, [{incl_cond, include}]},
 
     %% extra
     {app, nouveau, [{incl_cond, include}]},
diff --git a/src/couch_scanner/README.md b/src/couch_scanner/README.md
new file mode 100644
index 000000000..9b8bf66b7
--- /dev/null
+++ b/src/couch_scanner/README.md
@@ -0,0 +1,4 @@
+Couch Scanner
+================
+
+Traverse all dbs periodically and emit various reports
diff --git a/src/couch_scanner/src/couch_scanner.app.src 
b/src/couch_scanner/src/couch_scanner.app.src
new file mode 100644
index 000000000..961e9e80e
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner.app.src
@@ -0,0 +1,29 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+{application, couch_scanner, [
+    {description, "CouchDB Scanner"},
+    {vsn, git},
+    {registered, [
+        couch_scanner_server
+    ]},
+    {applications, [
+        kernel,
+        stdlib,
+        crypto,
+        config,
+        couch_log,
+        couch_stats,
+        fabric
+    ]},
+    {mod, {couch_scanner_app, []}}
+]}.
diff --git a/src/couch_scanner/src/couch_scanner.erl 
b/src/couch_scanner/src/couch_scanner.erl
new file mode 100644
index 000000000..1e7741d42
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner.erl
@@ -0,0 +1,28 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner).
+
+-export([
+    enable/0,
+    disable/0,
+    status/0
+]).
+
+enable() ->
+    couch_scanner_server:enable().
+
+disable() ->
+    couch_scanner_server:disable().
+
+status() ->
+    couch_scanner_server:status().
diff --git a/src/couch_scanner/src/couch_scanner_app.erl 
b/src/couch_scanner/src/couch_scanner_app.erl
new file mode 100644
index 000000000..23c4093dc
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner_app.erl
@@ -0,0 +1,23 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner_app).
+
+-behaviour(application).
+
+-export([start/2, stop/1]).
+
+start(_StartType, _StartArgs) ->
+    couch_scanner_sup:start_link().
+
+stop(_State) ->
+    ok.
diff --git a/src/couch_scanner/src/couch_scanner_checkpoint.erl 
b/src/couch_scanner/src/couch_scanner_checkpoint.erl
new file mode 100644
index 000000000..9b5e75646
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner_checkpoint.erl
@@ -0,0 +1,89 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner_checkpoint).
+
+-export([
+    write/1,
+    read/0,
+    reset/0
+]).
+
+-include_lib("couch/include/couch_db.hrl").
+
+-define(DOC_ID, <<?LOCAL_DOC_PREFIX, "scanner-checkpoint">>).
+
+% Public API
+
+write(#{} = State) ->
+    case enabled() of
+        true -> with_db(fun(Db) -> update_doc(Db, ?DOC_ID, State) end);
+        false -> ok
+    end.
+
+read() ->
+    case enabled() of
+        true -> with_db(fun(Db) -> load_doc(Db, ?DOC_ID) end);
+        false -> ok
+    end.
+
+reset() ->
+    case enabled() of
+        true -> with_db(fun(Db) -> delete_doc(Db, ?DOC_ID) end);
+        false -> ok
+    end.
+
+% Private functions
+
+delete_doc(Db, DocId) ->
+    case couch_db:open_doc(Db, DocId, []) of
+        {ok, #doc{revs = {_, RevList}}} ->
+            {ok, _} = couch_db:delete_doc(Db, DocId, RevList),
+            ok;
+        {not_found, _} ->
+            not_found
+    end.
+
+update_doc(Db, DocId, #{} = Body) ->
+    EJsonBody = ?JSON_DECODE(?JSON_ENCODE(Body#{<<"_id">> => DocId})),
+    Doc = couch_doc:from_json_obj(EJsonBody),
+    case couch_db:open_doc(Db, DocId, []) of
+        {ok, #doc{revs = Revs}} ->
+            {ok, _} = couch_db:update_doc(Db, Doc#doc{revs = Revs}, []);
+        {not_found, _} ->
+            {ok, _} = couch_db:update_doc(Db, Doc, [])
+    end,
+    ok.
+
+load_doc(Db, DocId) ->
+    case couch_db:open_doc(Db, DocId, [ejson_body]) of
+        {ok, #doc{body = EJsonBody}} ->
+            ?JSON_DECODE(?JSON_ENCODE(EJsonBody), [return_maps]);
+        {not_found, _} ->
+            not_found
+    end.
+
+with_db(Fun) ->
+    DbName = config:get("mem3", "shards_db", "_dbs"),
+    case mem3_util:ensure_exists(DbName) of
+        {ok, Db} ->
+            try
+                Fun(Db)
+            after
+                catch couch_db:close(Db)
+            end;
+        Else ->
+            throw(Else)
+    end.
+
+enabled() ->
+    config:get_boolean("couch_scanner", "persist", false).
diff --git a/src/couch_scanner/src/couch_scanner_server.erl 
b/src/couch_scanner/src/couch_scanner_server.erl
new file mode 100644
index 000000000..da9cc1a18
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner_server.erl
@@ -0,0 +1,154 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner_server).
+
+%-include_lib("couch/include/couch_db.hrl").
+
+-export([
+    start_link/0,
+    enable/0,
+    disable/0,
+    status/0
+]).
+
+-export([
+    init/1,
+    handle_continue/2,
+    handle_call/3,
+    handle_cast/2,
+    handle_info/2
+]).
+
+-export([
+    handle_config_change/5,
+    handle_config_terminate/3
+]).
+
+-define(CHECKPOINT_INTERVAL_MSEC, 180000).
+-define(CONFIG_RELISTEN_MSEC, 5000).
+
+-record(st, {
+    enabled,
+    started,
+    db_cursor,
+    checkpoint_st,
+    cluster_state
+}).
+
+start_link() ->
+    gen_server:start_link({local, ?MODULE}, ?MODULE, [], []).
+
+enable() ->
+    gen_server:call(?MODULE, enable).
+
+disable() ->
+    gen_server:call(?MODULE, disable).
+
+status() ->
+    gen_server:call(?MODULE, status).
+
+init(_Args) ->
+    process_flag(trap_exit, true),
+    ok = config:listen_for_changes(?MODULE, nil),
+    St = #st{
+        enabled = config:get_boolean("couch_scanner", "enabled", false),
+        started = 0,
+        db_cursor = <<>>,
+        checkpoint_st = #{},
+        cluster_state = {[node() | nodes()], mem3:nodes()}
+    },
+    {ok, St, {continue, unpersist}}.
+
+handle_continue(unpersist, #st{} = St) ->
+    UnpersistState = couch_scanner_persist:unpersist(),
+    couch_log:info("~p : unpersist state ~p", [?MODULE, UnpersistState]),
+    schedule_checkpoint(),
+    St1 = start(St),
+    {noreply, St1}.
+
+handle_call(enable, _From, #st{enabled = true} = St) ->
+    {reply, ok, St};
+handle_call(enable, _From, #st{enabled = false} = St) ->
+    couch_log:info("~p : enable", [?MODULE]),
+    {reply, ok, start(St#st{enabled = true})};
+handle_call(disable, _From, #st{enabled = false} = St) ->
+    {reply, ok, St};
+handle_call(disable, _From, #st{enabled = true} = St) ->
+    couch_log:info("~p : disable", [?MODULE]),
+    {reply, ok, stop(St#st{enabled = false})};
+handle_call(status, _From, #st{} = St) ->
+    {reply, St, St}.
+
+handle_cast(Msg, #st{} = St) ->
+    couch_log:error("~p : unknown cast ~p", [?MODULE, Msg]),
+    {noreply, St}.
+
+handle_info(checkpoint, #st{} = St) ->
+    St1 = checkpoint(St),
+    schedule_checkpoint(),
+    {noreply, St1};
+handle_info(restart_config, #st{} = St) ->
+    k = config:listen_for_changes(?MODULE, nil),
+    {noreply, St};
+handle_info(Msg, St) ->
+    couch_log:error("~p : unknown info message ~p", [?MODULE, Msg]),
+    {noreply, St}.
+
+handle_config_change("couch_scanner", "enabled", "true", _, S) ->
+    enable(),
+    {ok, S};
+handle_config_change("couch_scanner", "enabled", "false", _, S) ->
+    disable(),
+    {ok, S};
+handle_config_change(_, _, _, _, _) ->
+    {ok, nil}.
+
+handle_config_terminate(_Server, stop, _State) ->
+    ok;
+handle_config_terminate(_Server, _Reason, _State) ->
+    erlang:send_after(?CONFIG_RELISTEN_MSEC, whereis(?MODULE), restart_config).
+
+schedule_checkpoint() ->
+    erlang:send_after(?CHECKPOINT_INTERVAL_MSEC, self(), checkpoint).
+
+start(#st{enabled = false} = St) ->
+    St;
+start(#st{enabled = true} = St) ->
+    % check config module
+    % check if resuming from state
+    % if stale reset
+    % spawn db folder
+    % spawn isolated runner for each module
+    St#st{started = erlang:system_time(second)}.
+
+stop(#st{enabled = false} = St) ->
+    St;
+stop(#st{enabled = true} = St) ->
+    % maybe checkpoint
+    % stop runners
+    % stop db folder
+    St#st{enabled = false}.
+
+checkpoint(#st{enabled = false} = St) ->
+    St;
+checkpoint(#st{enabled = true} = St) ->
+    #st{checkpoint_st = PrevCheckpointSt} = St,
+    #st{db_cursor = DbCursor} = St,
+    CheckpointSt = #{db_cursor => DbCursor},
+    case PrevCheckpointSt == CheckpointSt of
+        true ->
+            St;
+        false ->
+            ok = couch_scanner_persist:persist(CheckpointSt),
+            St#st{checkpoint_st = CheckpointSt}
+    end.
diff --git a/src/couch_scanner/src/couch_scanner_sup.erl 
b/src/couch_scanner/src/couch_scanner_sup.erl
new file mode 100644
index 000000000..7baf0037a
--- /dev/null
+++ b/src/couch_scanner/src/couch_scanner_sup.erl
@@ -0,0 +1,34 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner_sup).
+
+-behaviour(supervisor).
+
+-export([
+    start_link/0,
+    init/1
+]).
+
+start_link() ->
+    supervisor:start_link({local, ?MODULE}, ?MODULE, []).
+
+init([]) ->
+    Children = [
+        #{
+            id => couch_scanner_server,
+            start => {couch_scanner_server, start_link, []},
+            shutdown => 5000
+        }
+    ],
+    SupFlags = #{strategy => rest_for_one, intensity => 25, period => 1},
+    {ok, {SupFlags, Children}}.
diff --git a/src/couch_scanner/test/eunit/couch_scanner_test.erl 
b/src/couch_scanner/test/eunit/couch_scanner_test.erl
new file mode 100644
index 000000000..6b4ecd99d
--- /dev/null
+++ b/src/couch_scanner/test/eunit/couch_scanner_test.erl
@@ -0,0 +1,34 @@
+% Licensed under the Apache License, Version 2.0 (the "License"); you may not
+% use this file except in compliance with the License. You may obtain a copy of
+% the License at
+%
+%   http://www.apache.org/licenses/LICENSE-2.0
+%
+% Unless required by applicable law or agreed to in writing, software
+% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+% License for the specific language governing permissions and limitations under
+% the License.
+
+-module(couch_scanner_test).
+
+-include_lib("couch/include/couch_eunit.hrl").
+
+couch_scanner_test_() ->
+    {
+        setup,
+        fun start_couch/0,
+        fun stop_couch/1,
+        with([
+            ?TDEF(t_scanner)
+        ])
+    }.
+
+start_couch() ->
+    test_util:start_config().
+
+stop_couch(Ctx) ->
+    test_util:stop_couch(Ctx).
+
+t_scanner(_Ctx) ->
+    ok.

Reply via email to