This is an automated email from the ASF dual-hosted git repository.

jiahuili430 pushed a commit to branch log-rep-domain
in repository https://gitbox.apache.org/repos/asf/couchdb.git

commit 7e1a1be69a8079183d81b1b1644083a35ee8f2d2
Author: Jiahui Li <[email protected]>
AuthorDate: Mon Feb 23 15:11:17 2026 -0600

    Add optional logging when using disallowed domains for replication
---
 rel/overlay/etc/default.ini                        | 10 +++
 .../src/couch_replicator_scheduler.erl             |  3 +-
 .../src/couch_replicator_utils.erl                 | 93 ++++++++++++++++++++++
 3 files changed, 105 insertions(+), 1 deletion(-)

diff --git a/rel/overlay/etc/default.ini b/rel/overlay/etc/default.ini
index 6fbc99f58..922588938 100644
--- a/rel/overlay/etc/default.ini
+++ b/rel/overlay/etc/default.ini
@@ -715,6 +715,16 @@ partitioned||* = true
 ; options may be found at https://www.erlang.org/doc/man/inet.html#setopts-2.
 ;valid_socket_options = buffer,keepalive,nodelay,priority,recbuf,sndbuf
 
+; Valid replication endpoint domains.
+;  - []: Don't verify the replication jobs.
+;  - ["domain1", "domain2", ...]: When enabled `verify_endpoint_domain_log`,
+;    generate additional logs when replication jobs with endpoint URLs not in
+;    this domain list.
+;valid_endpoint_domain = []
+
+; When enabled CouchDB will log any replication that uses disallowed domains.
+;verify_endpoint_domain_log = false
+
 ; Valid replication endpoint protocols. Replication jobs with endpoint urls not
 ; in this list will fail to run.
 ;valid_endpoint_protocols = http,https
diff --git a/src/couch_replicator/src/couch_replicator_scheduler.erl 
b/src/couch_replicator/src/couch_replicator_scheduler.erl
index 9eb0b4723..ca9738b88 100644
--- a/src/couch_replicator/src/couch_replicator_scheduler.erl
+++ b/src/couch_replicator/src/couch_replicator_scheduler.erl
@@ -665,7 +665,8 @@ maybe_remove_job_int(JobId, State) ->
 
 start_job_int(#job{pid = Pid}, _State) when Pid /= undefined ->
     ok;
-start_job_int(#job{} = Job0, State) ->
+start_job_int(#job{rep = Rep} = Job0, State) ->
+    ok = couch_replicator_utils:verify_endpoint_domain_log(Rep),
     Job = maybe_optimize_job_for_rate_limiting(Job0),
     case couch_replicator_scheduler_job:start_link(Job#job.rep) of
         {ok, Child} ->
diff --git a/src/couch_replicator/src/couch_replicator_utils.erl 
b/src/couch_replicator/src/couch_replicator_utils.erl
index 5e8187200..c924d9512 100644
--- a/src/couch_replicator/src/couch_replicator_utils.erl
+++ b/src/couch_replicator/src/couch_replicator_utils.erl
@@ -29,6 +29,7 @@
     remove_basic_auth_creds/1,
     normalize_basic_auth/1,
     seq_encode/1,
+    verify_endpoint_domain_log/1,
     valid_endpoint_protocols_log/1,
     verify_ssl_certificates_log/1,
     cacert_get/0
@@ -297,6 +298,40 @@ seq_encode(Seq) ->
     % object. We are being maximally compatible here.
     ?JSON_ENCODE(Seq).
 
+%% Log uses of disallowed domain
+verify_endpoint_domain_log(#rep{source = undefined, target = undefined}) ->
+    % When we cancel continuous transient replications (with a POST to 
_replicate)
+    % source and target will be undefined
+    ok;
+verify_endpoint_domain_log(#rep{} = Rep) ->
+    VerifyEnabled = config:get_boolean("replicator", 
"verify_endpoint_domain_log", false),
+    case VerifyEnabled of
+        true ->
+            AllowedDomainCfg = config:get("replicator", 
"valid_endpoint_domain", "[]"),
+            {ok, AllowedDomain} = couch_util:parse_term(AllowedDomainCfg),
+            ok = check_endpoint_domain(Rep, source, AllowedDomain),
+            ok = check_endpoint_domain(Rep, target, AllowedDomain);
+        false ->
+            ok
+    end.
+
+check_endpoint_domain(#rep{}, _, []) ->
+    ok;
+check_endpoint_domain(#rep{} = Rep, Type, AllowedDomain) ->
+    Url = url_from_type(Rep, Type),
+    #url{host = Host} = ibrowse_lib:parse_url(Url),
+    case lists:member(Host, AllowedDomain) of
+        true ->
+            ok;
+        false ->
+            couch_log:warning(
+                "**disallowed domain** replication ~s used disallowed domain 
~s at ~s", [
+                    rep_principal(Rep), Type, Url
+                ]
+            ),
+            ok
+    end.
+
 %% Log uses of http protocol
 valid_endpoint_protocols_log(#rep{source = undefined, target = undefined}) ->
     % When we cancel continuous transient replications (with a POST to 
_replicate)
@@ -811,6 +846,64 @@ t_allow_canceling_transient_jobs(_) ->
     ?assertEqual(ok, valid_endpoint_protocols_log(#rep{})),
     ?assertEqual(0, meck:num_calls(couch_log, warning, 2)).
 
+verify_endpoint_domain_log_setup() ->
+    Ctx = test_util:start_couch(),
+    config:set_boolean("replicator", "verify_endpoint_domain_log", true, 
false),
+    meck:new(couch_log, [passthrough]),
+    Ctx.
+
+verify_endpoint_domain_log_teardown(Ctx) ->
+    meck:unload(),
+    config:delete("replicator", "verify_endpoint_domain_log", false),
+    test_util:stop_couch(Ctx).
+
+verify_endpoint_domain_log_test_() ->
+    {
+        foreach,
+        fun verify_endpoint_domain_log_setup/0,
+        fun verify_endpoint_domain_log_teardown/1,
+        [
+            ?TDEF_FE(t_dont_warn_when_valid_endpoint_domain_is_empty),
+            ?TDEF_FE(t_warn_when_replicate_with_invalid_endpoint_domain)
+        ]
+    }.
+
+t_dont_warn_when_valid_endpoint_domain_is_empty(_) ->
+    set_allowed_domain("[]"),
+    Rep = #rep{
+        source = #httpdb{url = "https://foo.local"},
+        target = #httpdb{url = "https://127.0.0.2"}
+    },
+    meck:reset(couch_log),
+    ?assertEqual(ok, verify_endpoint_domain_log(Rep)),
+    ?assertEqual(0, meck:num_calls(couch_log, warning, 2)),
+    reset_allowed_domain().
+
+t_warn_when_replicate_with_invalid_endpoint_domain(_) ->
+    set_allowed_domain("[\"example.com\", \"127.0.0.1\"]"),
+    Rep1 = #rep{
+        source = #httpdb{url = "https://foo.local"},
+        target = #httpdb{url = "https://127.0.0.1"}
+    },
+    meck:reset(couch_log),
+    ?assertEqual(ok, verify_endpoint_domain_log(Rep1)),
+    ?assertEqual(1, meck:num_calls(couch_log, warning, 2)),
+
+    meck:reset(couch_log),
+    Rep2 = #rep{
+        source = #httpdb{url = "https://foo.local"},
+        target = #httpdb{url = "https://127.0.0.2"}
+    },
+    ?assertEqual(ok, verify_endpoint_domain_log(Rep2)),
+    ?assertEqual(2, meck:num_calls(couch_log, warning, 2)),
+    reset_allowed_domain().
+
+set_allowed_domain(Domains) ->
+    config:set("replicator", "valid_endpoint_domain", Domains, false).
+
+reset_allowed_domain() ->
+    config:delete("replicator", "valid_endpoint_domain", false).
+
 cacert_test() ->
     Old = ?CACERT_DEFAULT_TIMESTAMP,
     Now = erlang:monotonic_time(second),

Reply via email to