At a first glance, change tracking should never be allowed for write-only columns. However, some clients (e.g., ovn-northd) that are mostly exclusive writers of a database, use change tracking to avoid duplicating the IDL row records into a local cache when implementing incremental processing.
The default behavior of the IDL is to automatically turn a write-only column into a read-write column whenever the client enables change tracking for that column. For the afore mentioned clients, this becomes a performance issue. Commit 1cc618c32524 ("ovsdb-idl: Fix atomicity of writes that don't change a column's value.") explains why writes that don't change a column's value cannot be optimized out early if the column is read/write. Furthermore, if there is at least one record in any table that changed during a transaction, then *all* records that have been written are added to the transaction, even if their values didn't change. If there are many such rows (e.g., like in ovn-northd's case) this incurs a significant overhead because: a. the client has to build this large transaction b. the transaction has to be sent over the network c. the server needs to parse this (mostly) no-op update We now introduce a new IDL API allowing users to explicitly request change tracking of a column *without* forcing the column mode to read-write. We benchmarked ovn-northd performance when using this new mode against NB and SB databases taken from ovn-kubernetes scale tests. We noticed that when a minor change is performed to the Northbound database (e.g., NB_Global.nb_cfg is incremented) the time it takes to build the Southbound transaction becomes negligible (vs ~1.5 seconds before this change). End-to-end ovn-kubernetes scale tests on 120-node clusters also show significant reduction of latency to bring up pods; both average and P99 latency decreased by ~30%. Signed-off-by: Dumitru Ceara <dce...@redhat.com> --- Note: The OVN counter part change is: https://github.com/dceara/ovn/commit/c051e774120967e7046031098410dd521ee430b7 --- NEWS | 2 ++ lib/ovsdb-idl.c | 62 ++++++++++++++++++++++++++++++++++++---------- lib/ovsdb-idl.h | 1 + tests/ovsdb-idl.at | 20 ++++++++++++--- tests/test-ovsdb.c | 21 ++++++++++++---- 5 files changed, 85 insertions(+), 21 deletions(-) diff --git a/NEWS b/NEWS index 8fa57836a928..2e87d1735aa2 100644 --- a/NEWS +++ b/NEWS @@ -3,6 +3,8 @@ Post-v2.17.0 - OVSDB: * 'relay' service model now supports transaction history, i.e. honors the 'last-txn-id' field in 'monitor_cond_since' requests from clients. + - OVSDB-IDL: + * New interface for enabling change tracking of write-only columns. v2.17.0 - 17 Feb 2022 diff --git a/lib/ovsdb-idl.c b/lib/ovsdb-idl.c index 882ede75598d..6898d0911602 100644 --- a/lib/ovsdb-idl.c +++ b/lib/ovsdb-idl.c @@ -93,6 +93,8 @@ struct ovsdb_idl { struct ovsdb_idl_txn *txn; struct hmap outstanding_txns; bool verify_write_only; + bool track_changes_noalert; /* If true then the IDL allows change tracking + * of write-only columns. */ struct ovs_list deleted_untracked_rows; /* Stores rows deleted in the * current run, that are not yet * added to the track_list. */ @@ -264,6 +266,7 @@ ovsdb_idl_create_unconnected(const struct ovsdb_idl_class *class, .txn = NULL, .outstanding_txns = HMAP_INITIALIZER(&idl->outstanding_txns), .verify_write_only = false, + .track_changes_noalert = false, .deleted_untracked_rows = OVS_LIST_INITIALIZER(&idl->deleted_untracked_rows), .rows_to_reparse @@ -586,6 +589,19 @@ ovsdb_idl_verify_write_only(struct ovsdb_idl *idl) idl->verify_write_only = true; } +/* In regular operation mode, IDL change tracking of a column implies + * that the column is read-write (OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT set). + * This comes at a cost as no-op updates to read-write columns cannot be + * easily optimized out (see ovsdb_idl_txn_write()). This function allows + * users to enable a mode in which change tracking is allowed for write-only + * columns. + */ +void +ovsdb_idl_track_changes_noalert(struct ovsdb_idl *idl) +{ + idl->track_changes_noalert = true; +} + /* Returns true if 'idl' is currently connected or trying to connect * and a negative response to a schema request has not been received */ bool @@ -889,6 +905,16 @@ add_ref_table(struct ovsdb_idl *idl, const struct ovsdb_base_type *base) } } +static void +ovsdb_idl_add_column__(struct ovsdb_idl *idl, + const struct ovsdb_idl_column *column, + unsigned char mode) +{ + ovsdb_idl_set_mode(idl, column, mode); + add_ref_table(idl, &column->type.key); + add_ref_table(idl, &column->type.value); +} + /* Turns on OVSDB_IDL_MONITOR and OVSDB_IDL_ALERT for 'column' in 'idl'. Also * ensures that any tables referenced by 'column' will be replicated, even if * no columns in that table are selected for replication (see @@ -902,9 +928,7 @@ void ovsdb_idl_add_column(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { - ovsdb_idl_set_mode(idl, column, OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT); - add_ref_table(idl, &column->type.key); - add_ref_table(idl, &column->type.value); + ovsdb_idl_add_column__(idl, column, OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT); } /* Ensures that the table with class 'tc' will be replicated on 'idl' even if @@ -1236,15 +1260,25 @@ ovsdb_idl_row_get_seqno(const struct ovsdb_idl_row *row, * functions. * * This function should be called between ovsdb_idl_create() and - * the first call to ovsdb_idl_run(). The column to be tracked - * should have OVSDB_IDL_ALERT turned on. + * the first call to ovsdb_idl_run(). The column to be traked should + * have: + * a. OVSDB_IDL_ALERT turned on if 'track_changes_noalert' is 'false' + * OR + * b. OVSDB_IDL_MONITOR turned on if 'track_changes_noalert' is 'true' */ void ovsdb_idl_track_add_column(struct ovsdb_idl *idl, const struct ovsdb_idl_column *column) { - if (!(*ovsdb_idl_get_mode(idl, column) & OVSDB_IDL_ALERT)) { - ovsdb_idl_add_column(idl, column); + if (idl->track_changes_noalert) { + if (!(*ovsdb_idl_get_mode(idl, column) & OVSDB_IDL_MONITOR)) { + ovsdb_idl_add_column__(idl, column, OVSDB_IDL_MONITOR); + } + } else { + if (!(*ovsdb_idl_get_mode(idl, column) & OVSDB_IDL_ALERT)) { + ovsdb_idl_add_column__(idl, column, + OVSDB_IDL_MONITOR | OVSDB_IDL_ALERT); + } } *ovsdb_idl_get_mode(idl, column) |= OVSDB_IDL_TRACK; } @@ -1694,11 +1728,13 @@ ovsdb_idl_row_change(struct ovsdb_idl_row *row, const struct shash *values, continue; } - if (datum_changed && table->modes[column_idx] & OVSDB_IDL_ALERT) { - changed = true; - row->change_seqno[change] - = row->table->change_seqno[change] - = row->table->idl->change_seqno + 1; + if (datum_changed) { + if (table->modes[column_idx] & OVSDB_IDL_ALERT) { + changed = true; + row->change_seqno[change] + = row->table->change_seqno[change] + = row->table->idl->change_seqno + 1; + } if (table->modes[column_idx] & OVSDB_IDL_TRACK) { if (ovs_list_is_empty(&row->track_node) && @@ -3501,7 +3537,7 @@ ovsdb_idl_txn_write__(const struct ovsdb_idl_row *row_, class = row->table->class_; column_idx = column - class->columns; - write_only = row->table->modes[column_idx] == OVSDB_IDL_MONITOR; + write_only = !(row->table->modes[column_idx] & OVSDB_IDL_ALERT); ovs_assert(row->new_datum != NULL); ovs_assert(column_idx < class->n_columns); diff --git a/lib/ovsdb-idl.h b/lib/ovsdb-idl.h index d00599616ef9..bcb576359b91 100644 --- a/lib/ovsdb-idl.h +++ b/lib/ovsdb-idl.h @@ -85,6 +85,7 @@ bool ovsdb_idl_has_ever_connected(const struct ovsdb_idl *); void ovsdb_idl_enable_reconnect(struct ovsdb_idl *); void ovsdb_idl_force_reconnect(struct ovsdb_idl *); void ovsdb_idl_verify_write_only(struct ovsdb_idl *); +void ovsdb_idl_track_changes_noalert(struct ovsdb_idl *); bool ovsdb_idl_is_alive(const struct ovsdb_idl *); bool ovsdb_idl_is_connected(const struct ovsdb_idl *idl); diff --git a/tests/ovsdb-idl.at b/tests/ovsdb-idl.at index 62e2b638320c..0436e0f77b0f 100644 --- a/tests/ovsdb-idl.at +++ b/tests/ovsdb-idl.at @@ -1154,12 +1154,25 @@ OVSDB_CHECK_IDL_WO_MONITOR_COND([simple idl disable monitor-cond], ]]) m4_define([OVSDB_CHECK_IDL_TRACK_C], - [AT_SETUP([$1 - C]) + [AT_SETUP([$1 - alert - C]) + AT_KEYWORDS([ovsdb server idl tracking positive $5]) + AT_CHECK([ovsdb_start_idltest]) + m4_if([$2], [], [], + [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) + AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 --change-track=alert idl unix:socket $3], + [0], [stdout], [ignore]) + AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), + [0], [$4]) + OVSDB_SERVER_SHUTDOWN + AT_CLEANUP]) + +m4_define([OVSDB_CHECK_IDL_TRACK_NOALERT_C], + [AT_SETUP([$1 - noalert - C]) AT_KEYWORDS([ovsdb server idl tracking positive $5]) AT_CHECK([ovsdb_start_idltest]) m4_if([$2], [], [], [AT_CHECK([ovsdb-client transact unix:socket $2], [0], [ignore], [ignore])]) - AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 -c idl unix:socket $3], + AT_CHECK([test-ovsdb '-vPATTERN:console:test-ovsdb|%c|%m' -vjsonrpc -t10 --change-track=noalert idl unix:socket $3], [0], [stdout], [ignore]) AT_CHECK([sort stdout | uuidfilt]m4_if([$6],,, [[| $6]]), [0], [$4]) @@ -1167,7 +1180,8 @@ m4_define([OVSDB_CHECK_IDL_TRACK_C], AT_CLEANUP]) m4_define([OVSDB_CHECK_IDL_TRACK], - [OVSDB_CHECK_IDL_TRACK_C($@)]) + [OVSDB_CHECK_IDL_TRACK_C($@) + OVSDB_CHECK_IDL_TRACK_NOALERT_C($@)]) OVSDB_CHECK_IDL_TRACK([track, simple idl, initially populated], [['["idltest", diff --git a/tests/test-ovsdb.c b/tests/test-ovsdb.c index ca4e87b8115c..a5de7919d4c6 100644 --- a/tests/test-ovsdb.c +++ b/tests/test-ovsdb.c @@ -56,6 +56,7 @@ VLOG_DEFINE_THIS_MODULE(test_ovsdb); struct test_ovsdb_pvt_context { + bool track_noalert; bool track; }; @@ -122,6 +123,15 @@ parse_options(int argc, char *argv[], struct test_ovsdb_pvt_context *pvt) break; case 'c': + if (optarg) { + if (!strcmp(optarg, "noalert")) { + pvt->track_noalert = true; + } else if (!strcmp(optarg, "alert")) { + pvt->track_noalert = false; + } else { + ovs_fatal(0, "value %s is invalid", optarg); + } + } pvt->track = true; break; @@ -2610,6 +2620,7 @@ update_conditions(struct ovsdb_idl *idl, char *commands) static void do_idl(struct ovs_cmdl_context *ctx) { + struct test_ovsdb_pvt_context *pvt = ctx->pvt; struct jsonrpc *rpc; struct ovsdb_idl *idl; unsigned int seqno = 0; @@ -2618,9 +2629,6 @@ do_idl(struct ovs_cmdl_context *ctx) int step = 0; int error; int i; - bool track; - - track = ((struct test_ovsdb_pvt_context *)(ctx->pvt))->track; idl = ovsdb_idl_create(ctx->argv[1], &idltest_idl_class, true, true); ovsdb_idl_set_leader_only(idl, false); @@ -2637,7 +2645,10 @@ do_idl(struct ovs_cmdl_context *ctx) rpc = NULL; } - if (track) { + if (pvt->track_noalert) { + ovsdb_idl_track_changes_noalert(idl); + } + if (pvt->track) { ovsdb_idl_track_add_all(idl); } @@ -2683,7 +2694,7 @@ do_idl(struct ovs_cmdl_context *ctx) } /* Print update. */ - if (track) { + if (pvt->track) { print_idl_track(idl, step++, terse); ovsdb_idl_track_clear(idl); } else { -- 2.27.0 _______________________________________________ dev mailing list d...@openvswitch.org https://mail.openvswitch.org/mailman/listinfo/ovs-dev