diff --git a/NEWS b/NEWS index 748ae30eb2..31593dabc0 100644 --- a/NEWS +++ b/NEWS @@ -2,6 +2,12 @@ Post v26.03.0 ------------- - Added ability to set any "ipsec_*" NB_Global option to configure the IPsec backend. + - Added nb_cfg_timestamp to SB_Global, written atomically with each + nb_cfg update by ovn-northd. ovn-controller propagates this value + to the local OVS bridge external_ids as "ovn-nb-cfg-sb-ts". In + large deployments where options:enable_chassis_nb_cfg_update is + false, this enables per-chassis propagation latency tracking without + any southbound database writes. - Documented missing ovn-nbctl commands: "mirror-rule-add", "mirror-rule-del", "lr-nat-update-ext-ip", "ha-chassis-group-set-chassis-prio", "lsp-add-router-port", diff --git a/controller/ovn-controller.8.xml b/controller/ovn-controller.8.xml index 57e7cf5dd2..ec713ce4f7 100644 --- a/controller/ovn-controller.8.xml +++ b/controller/ovn-controller.8.xml @@ -658,6 +658,28 @@
+external-ids:ovn-nb-cfg-sb-ts in the Bridge
+ table
+
+ This key records the value of
+ OVN_Southbound.SB_Global.nb_cfg_timestamp that
+ corresponded to the ovn-nb-cfg generation for which all
+ flows have been successfully installed in OVS. It is the wall-clock
+ time (in milliseconds since the Unix epoch) at which
+ ovn-northd wrote that nb_cfg value to the
+ Southbound database. The difference between this timestamp and
+ ovn-nb-cfg-ts gives the per-chassis end-to-end
+ propagation latency for that configuration generation. The key is
+ absent when no timestamp is available (e.g. when
+ NB_Global.options:enable_chassis_nb_cfg_update is
+ false).
+
external_ids:ovn-installed and
external_ids:ovn-installed-ts in the
diff --git a/controller/ovn-controller.c b/controller/ovn-controller.c
index fd848c54c6..405c985d47 100644
--- a/controller/ovn-controller.c
+++ b/controller/ovn-controller.c
@@ -141,6 +141,7 @@ static unixctl_cb_func debug_delay_nb_cfg_report;
#define OVS_NB_CFG_NAME "ovn-nb-cfg"
#define OVS_NB_CFG_TS_NAME "ovn-nb-cfg-ts"
+#define OVS_NB_CFG_SB_TS_NAME "ovn-nb-cfg-sb-ts"
#define OVS_STARTUP_TS_NAME "ovn-startup-ts"
struct br_int_remote {
@@ -825,28 +826,62 @@ struct ed_type_ct_zones {
};
-static uint64_t
+/* Snapshot of SB_Global.nb_cfg and its paired nb_cfg_timestamp, always read
+ * from the same IDL transaction so the two values are consistent. */
+struct nb_cfg_snap {
+ uint64_t nb_cfg;
+ uint64_t ts;
+};
+
+/* Returns a snapshot of the current SB_Global nb_cfg and nb_cfg_timestamp.
+ * The pair is always read from the same SB_Global snapshot so callers can
+ * rely on (nb_cfg, ts) being consistent.
+ *
+ * 'ts' is the wall-clock time northd wrote nb_cfg to SB. The delta between
+ * that and the local completion time is the per-chassis end-to-end
+ * propagation latency (northd compile + SB write + relay fan-out +
+ * ovn-controller engine + ofctrl barrier ack).
+ *
+ * If a monitor condition change is in flight the cached snapshot from the
+ * previous call is returned, because updates received between the request
+ * and the cond ack could be from before the SB_Global value we're trying
+ * to read.
+ */
+static struct nb_cfg_snap
get_nb_cfg(const struct sbrec_sb_global_table *sb_global_table,
unsigned int cond_seqno, unsigned int expected_cond_seqno)
{
- static uint64_t nb_cfg = 0;
+ static struct nb_cfg_snap snap = {0, 0};
- /* Delay getting nb_cfg if there are monitor condition changes
- * in flight. It might be that those changes would instruct the
- * server to send updates that happened before SB_Global.nb_cfg.
- */
- if (cond_seqno != expected_cond_seqno) {
- return nb_cfg;
+ if (cond_seqno == expected_cond_seqno) {
+ const struct sbrec_sb_global *sb
+ = sbrec_sb_global_table_first(sb_global_table);
+ snap.nb_cfg = sb ? sb->nb_cfg : 0;
+ snap.ts = sb ? (uint64_t) sb->nb_cfg_timestamp : 0;
}
- const struct sbrec_sb_global *sb
- = sbrec_sb_global_table_first(sb_global_table);
- nb_cfg = sb ? sb->nb_cfg : 0;
- return nb_cfg;
+ return snap;
}
/* Propagates the local cfg seqno, 'cur_cfg', to the chassis_private record
* and to the local OVS DB.
+ *
+ * The br-int external_ids triplet (ovn-nb-cfg, ovn-nb-cfg-ts,
+ * ovn-nb-cfg-sb-ts) is stamped unconditionally, independent of
+ * 'enable_ch_nb_cfg_update'. The SB chassis_private writeback remains
+ * gated by 'enable_ch_nb_cfg_update' for deployments that want to suppress
+ * the per-bump SB write load. An external exporter watching br-int can
+ * compute the per-chassis propagation delta as
+ * (ovn-nb-cfg-ts - ovn-nb-cfg-sb-ts)
+ * regardless of the writeback setting.
+ *
+ * 'ovn-nb-cfg-sb-ts' is the SB_Global.nb_cfg_timestamp that was paired
+ * with this cur_cfg at the moment the barrier was queued (snapshotted via
+ * ofctrl-seqno's req_ts). This avoids the pitfall of pairing the just-
+ * acked cur_cfg with whatever SB_Global timestamp happens to be current
+ * now -- on a fast-churning fleet SB_Global may already have advanced
+ * past cur_cfg by the time the barrier acks, which would under-report
+ * the delta.
*/
static void
store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn,
@@ -858,6 +893,7 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn,
struct ofctrl_acked_seqnos *acked_nb_cfg_seqnos =
ofctrl_acked_seqnos_get(ofctrl_seq_type_nb_cfg);
uint64_t cur_cfg = acked_nb_cfg_seqnos->last_acked;
+ uint64_t cur_cfg_sb_ts = acked_nb_cfg_seqnos->last_acked_req_ts;
int64_t startup_ts = daemon_startup_ts();
if (ovs_txn && br_int
@@ -894,6 +930,16 @@ store_nb_cfg(struct ovsdb_idl_txn *sb_txn, struct ovsdb_idl_txn *ovs_txn,
cur_cfg_str);
ovsrec_bridge_update_external_ids_setkey(br_int, OVS_NB_CFG_TS_NAME,
cur_cfg_ts_str);
+ if (cur_cfg_sb_ts) {
+ char *sb_ts_str = xasprintf("%"PRIu64, cur_cfg_sb_ts);
+ ovsrec_bridge_update_external_ids_setkey(br_int,
+ OVS_NB_CFG_SB_TS_NAME,
+ sb_ts_str);
+ free(sb_ts_str);
+ } else {
+ ovsrec_bridge_update_external_ids_delkey(br_int,
+ OVS_NB_CFG_SB_TS_NAME);
+ }
free(cur_cfg_ts_str);
free(cur_cfg_str);
}
@@ -8199,12 +8245,18 @@ main(int argc, char *argv[])
chassis, mac_cache_data);
}
- ofctrl_seqno_update_create(
- ofctrl_seq_type_nb_cfg,
- get_nb_cfg(sbrec_sb_global_table_get(
- ovnsb_idl_loop.idl),
- ovnsb_cond_seqno,
- ovnsb_expected_cond_seqno));
+ /* Snapshot (nb_cfg, sb_ts) atomically from SB_Global
+ * and pair them through the barrier ack so the
+ * eventual completion can be attributed to the
+ * timestamp that corresponded to this exact nb_cfg
+ * generation -- not whatever SB_Global value has
+ * moved on to by the time the barrier acks. */
+ struct nb_cfg_snap snap = get_nb_cfg(
+ sbrec_sb_global_table_get(ovnsb_idl_loop.idl),
+ ovnsb_cond_seqno, ovnsb_expected_cond_seqno);
+ ofctrl_stamped_seqno_update_create(ofctrl_seq_type_nb_cfg,
+ snap.nb_cfg,
+ snap.ts);
struct local_binding_data *binding_data =
runtime_data ? &runtime_data->lbinding_data : NULL;
diff --git a/lib/ofctrl-seqno.c b/lib/ofctrl-seqno.c
index 83c17c0e52..48ed158590 100644
--- a/lib/ofctrl-seqno.c
+++ b/lib/ofctrl-seqno.c
@@ -36,6 +36,11 @@ struct ofctrl_seqno_update {
* application.
*/
uint64_t req_cfg; /* Application specific seqno. */
+ uint64_t req_ts; /* Opaque per-request timestamp captured by
+ * the caller at the moment the update was
+ * queued. Carried through to the acked
+ * state so consumers can pair the acked
+ * seqno with the input that produced it. */
};
/* List of in flight sequence number updates. */
@@ -51,6 +56,9 @@ struct ofctrl_seqno_state {
* application consumed acked requests.
*/
uint64_t cur_cfg; /* Last acked application seqno. */
+ uint64_t cur_cfg_req_ts; /* req_ts that was paired with cur_cfg when
+ * the update was queued. 0 if the caller
+ * didn't supply a timestamp. */
uint64_t req_cfg; /* Last requested application seqno. */
};
@@ -73,6 +81,7 @@ ofctrl_acked_seqnos_get(size_t seqno_type)
struct ofctrl_acked_seqnos *acked_seqnos = xmalloc(sizeof *acked_seqnos);
acked_seqnos->acked = vector_clone(&state->acked_cfgs);
acked_seqnos->last_acked = state->cur_cfg;
+ acked_seqnos->last_acked_req_ts = state->cur_cfg_req_ts;
vector_clear(&state->acked_cfgs);
if (vector_capacity(&state->acked_cfgs) >= VECTOR_THRESHOLD) {
@@ -140,6 +149,7 @@ ofctrl_seqno_add_type(void)
struct ofctrl_seqno_state state = (struct ofctrl_seqno_state) {
.acked_cfgs = VECTOR_EMPTY_INITIALIZER(uint64_t),
.cur_cfg = 0,
+ .cur_cfg_req_ts = 0,
.req_cfg = 0,
};
vector_push(&ofctrl_seqno_states, &state);
@@ -152,6 +162,19 @@ ofctrl_seqno_add_type(void)
*/
void
ofctrl_seqno_update_create(size_t seqno_type, uint64_t new_cfg)
+{
+ ofctrl_stamped_seqno_update_create(seqno_type, new_cfg, 0);
+}
+
+/* Like ofctrl_seqno_update_create() but also records 'req_ts', an opaque
+ * timestamp captured by the caller (e.g. SB_Global nb_cfg_timestamp at the
+ * moment 'new_cfg' was read). The timestamp is carried unchanged to
+ * ofctrl_acked_seqnos_get() so consumers can pair the eventual ack with the
+ * input state that produced it.
+ */
+void
+ofctrl_stamped_seqno_update_create(size_t seqno_type, uint64_t new_cfg,
+ uint64_t req_ts)
{
struct ofctrl_seqno_state *state = ofctrl_seqno_state_get(seqno_type);
@@ -169,6 +192,7 @@ ofctrl_seqno_update_create(size_t seqno_type, uint64_t new_cfg)
.seqno_type = seqno_type,
.flow_cfg = ofctrl_req_seqno,
.req_cfg = new_cfg,
+ .req_ts = req_ts,
};
vector_push(&ofctrl_seqno_updates, &update);
}
@@ -190,6 +214,7 @@ ofctrl_seqno_run(uint64_t flow_cfg)
struct ofctrl_seqno_state *state =
ofctrl_seqno_state_get(update->seqno_type);
state->cur_cfg = update->req_cfg;
+ state->cur_cfg_req_ts = update->req_ts;
vector_push(&state->acked_cfgs, &update->req_cfg);
index++;
diff --git a/lib/ofctrl-seqno.h b/lib/ofctrl-seqno.h
index faa97cc535..06db4e67c3 100644
--- a/lib/ofctrl-seqno.h
+++ b/lib/ofctrl-seqno.h
@@ -23,10 +23,18 @@
/* Collection of acked ofctrl_seqno_update requests and the most recent
* 'last_acked' value.
+ *
+ * 'last_acked_req_ts' carries the opaque timestamp that was associated with
+ * 'last_acked' at the time the seqno was requested. Consumers that don't
+ * pass a timestamp at create time will see 0 here. The timestamp is
+ * preserved across the barrier so that applications can pair the acked
+ * config seqno with the input state that produced it (e.g. SB_Global
+ * nb_cfg_timestamp at the moment we asked OVS to barrier).
*/
struct ofctrl_acked_seqnos {
struct vector acked;
uint64_t last_acked;
+ uint64_t last_acked_req_ts;
};
struct ofctrl_acked_seqnos *ofctrl_acked_seqnos_get(size_t seqno_type);
@@ -36,6 +44,8 @@ bool ofctrl_acked_seqnos_contains(const struct ofctrl_acked_seqnos *seqnos,
size_t ofctrl_seqno_add_type(void);
void ofctrl_seqno_update_create(size_t seqno_type, uint64_t new_cfg);
+void ofctrl_stamped_seqno_update_create(size_t seqno_type, uint64_t new_cfg,
+ uint64_t req_ts);
void ofctrl_seqno_run(uint64_t flow_cfg);
uint64_t ofctrl_seqno_get_req_cfg(void);
void ofctrl_seqno_flush(void);
diff --git a/northd/ovn-northd.c b/northd/ovn-northd.c
index 7a64a6ef27..ffd3d00fb4 100644
--- a/northd/ovn-northd.c
+++ b/northd/ovn-northd.c
@@ -541,6 +541,7 @@ update_sequence_numbers(int64_t loop_start_time,
* Also set up to update sb_cfg once our southbound transaction commits. */
if (nb->nb_cfg != sb->nb_cfg) {
sbrec_sb_global_set_nb_cfg(sb, nb->nb_cfg);
+ sbrec_sb_global_set_nb_cfg_timestamp(sb, loop_start_time);
nbrec_nb_global_set_nb_cfg_timestamp(nb, loop_start_time);
}
sb_loop->next_cfg = nb->nb_cfg;
@@ -943,6 +944,8 @@ main(int argc, char *argv[])
/* Disable alerting for pure write-only columns. */
ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_sb_global_col_nb_cfg);
+ ovsdb_idl_omit_alert(ovnsb_idl_loop.idl,
+ &sbrec_sb_global_col_nb_cfg_timestamp);
ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_address_set_col_name);
ovsdb_idl_omit_alert(ovnsb_idl_loop.idl, &sbrec_address_set_col_addresses);
for (size_t i = 0; i < SBREC_LOGICAL_FLOW_N_COLUMNS; i++) {
diff --git a/ovn-sb.ovsschema b/ovn-sb.ovsschema
index d9a91739cc..f3766f01c5 100644
--- a/ovn-sb.ovsschema
+++ b/ovn-sb.ovsschema
@@ -1,11 +1,12 @@
{
"name": "OVN_Southbound",
- "version": "21.8.0",
- "cksum": "614397313 36713",
+ "version": "21.9.0",
+ "cksum": "2351636143 36779",
"tables": {
"SB_Global": {
"columns": {
"nb_cfg": {"type": {"key": "integer"}},
+ "nb_cfg_timestamp": {"type": {"key": "integer"}},
"external_ids": {
"type": {"key": "string", "value": "string",
"min": 0, "max": "unlimited"}},
diff --git a/ovn-sb.xml b/ovn-sb.xml
index e45b63d73f..c1883db2e3 100644
--- a/ovn-sb.xml
+++ b/ovn-sb.xml
@@ -156,6 +156,15 @@
the southbound database to bring it up to date with these changes, it
updates this column to the same value.
+
+ ovn-northd last wrote
+ to the southbound database, in milliseconds
+ since the Unix epoch. Set atomically with each update to
+ . Hypervisors read this value to measure
+ end-to-end propagation latency from northbound commit to local
+ datapath programming completion.
+