From ddc2ae2939b836ea5650308826d037a31ab38eb6 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sat, 21 Mar 2026 20:25:13 +0100 Subject: [PATCH 01/19] l4: add trace with L4 ports Add trace output showing source and destination ports for UDP and TCP packets in l4_input_local. Also parse TCP headers to extract ports before falling through to management plane forwarding. Signed-off-by: Robin Jarry --- modules/l4/l4_input_local.c | 43 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/modules/l4/l4_input_local.c b/modules/l4/l4_input_local.c index 702bca16d..d9ebbb980 100644 --- a/modules/l4/l4_input_local.c +++ b/modules/l4/l4_input_local.c @@ -7,6 +7,7 @@ #include "l4.h" #include "log.h" +#include #include LOG_TYPE("graph"); @@ -62,13 +63,27 @@ int l4_input_unalias_port(uint8_t proto, rte_be16_t alias) { return 0; } +struct l4_trace_data { + rte_be16_t sport; + rte_be16_t dport; +}; + +static int trace_l4_format(char *buf, size_t len, const void *data, size_t /*data_len*/) { + const struct l4_trace_data *d = data; + return snprintf( + buf, len, "src=%u dst=%u", rte_be_to_cpu_16(d->sport), rte_be_to_cpu_16(d->dport) + ); +} + static uint16_t l4_input_local_process( struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs ) { - struct rte_udp_hdr *hdr; + struct rte_tcp_hdr *tcp; + struct rte_udp_hdr *udp; + rte_be16_t sport, dport; struct rte_mbuf *mbuf; rte_edge_t edge; uint8_t proto; @@ -76,6 +91,7 @@ static uint16_t l4_input_local_process( for (uint16_t i = 0; i < nb_objs; i++) { mbuf = objs[i]; edge = BAD_PROTO; + sport = dport = 0; if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) proto = ip_local_mbuf_data(mbuf)->proto; @@ -84,14 +100,28 @@ static uint16_t l4_input_local_process( else goto next; - if (proto != IPPROTO_UDP) { + switch (proto) { + case IPPROTO_UDP: + udp = rte_pktmbuf_mtod(mbuf, struct rte_udp_hdr *); + sport = udp->src_port; + dport = udp->dst_port; + edge = udp_edges[udp->dst_port]; + break; + case IPPROTO_TCP: + tcp = rte_pktmbuf_mtod(mbuf, struct rte_tcp_hdr *); + sport = tcp->src_port; + dport = tcp->dst_port; + // fallthrough + default: edge = MANAGEMENT; - goto next; + break; } - - hdr = rte_pktmbuf_mtod(mbuf, struct rte_udp_hdr *); - edge = udp_edges[hdr->dst_port]; next: + if (gr_mbuf_is_traced(mbuf)) { + struct l4_trace_data *t = gr_mbuf_trace_add(mbuf, node, sizeof(*t)); + t->sport = sport; + t->dport = dport; + } rte_node_enqueue_x1(graph, node, edge, mbuf); } return nb_objs; @@ -117,6 +147,7 @@ static struct gr_node_info info = { .node = &input_node, .type = GR_NODE_T_L4, .register_callback = l4_input_local_register, + .trace_format = trace_l4_format, }; GR_NODE_REGISTER(info); From 7db4bfea75378d5a11b00dc0a51768fdb002b52a Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:17:01 +0100 Subject: [PATCH 02/19] vrf: add a random mac address on creation VRF interfaces now get a random MAC address at creation time. This MAC will be used as the Router MAC for EVPN L3VNI type-5 routes. Signed-off-by: Robin Jarry --- frr/if_grout.c | 3 +++ modules/infra/api/gr_infra.h | 2 ++ modules/infra/cli/vrf.c | 9 ++++++++- modules/infra/control/vrf.c | 18 ++++++++++++++++++ modules/ip6/control/address.c | 3 +++ 5 files changed, 34 insertions(+), 1 deletion(-) diff --git a/frr/if_grout.c b/frr/if_grout.c index 12c0b3bff..576c8d3c3 100644 --- a/frr/if_grout.c +++ b/frr/if_grout.c @@ -48,6 +48,7 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { const struct gr_iface_info_vlan *gr_vlan = NULL; const struct gr_iface_info_port *gr_port = NULL; const struct gr_iface_info_bond *gr_bond = NULL; + const struct gr_iface_info_vrf *gr_vrf = NULL; ifindex_t bridge_ifindex = IFINDEX_INTERNAL; ifindex_t link_ifindex = IFINDEX_INTERNAL; ifindex_t bond_ifindex = IFINDEX_INTERNAL; @@ -83,6 +84,8 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { link_type = ZEBRA_LLT_IPIP; break; case GR_IFACE_TYPE_VRF: + gr_vrf = (const struct gr_iface_info_vrf *)&gr_if->info; + mac = &gr_vrf->mac; link_type = ZEBRA_LLT_ETHER; zif_type = ZEBRA_IF_VRF; break; diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index 9983ff463..b249597fd 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -127,6 +127,7 @@ struct gr_iface_info_port { // VRF reconfiguration attribute flags. #define GR_VRF_SET_FIB GR_BIT64(32) +#define GR_VRF_SET_MAC GR_BIT64(33) // Per-AF FIB configuration. struct gr_iface_info_vrf_fib { @@ -138,6 +139,7 @@ struct gr_iface_info_vrf_fib { struct gr_iface_info_vrf { struct gr_iface_info_vrf_fib ipv4; struct gr_iface_info_vrf_fib ipv6; + struct rte_ether_addr mac; // Used as Router MAC for EVPN L3VNI. }; // VLAN reconfiguration attribute flags. diff --git a/modules/infra/cli/vrf.c b/modules/infra/cli/vrf.c index ea7cf0876..a3a854016 100644 --- a/modules/infra/cli/vrf.c +++ b/modules/infra/cli/vrf.c @@ -10,17 +10,19 @@ #define VRF_ATTRS_CMD \ "(rib4-routes RIB4_ROUTES),(fib4-tbl8 FIB4_TBL8)" \ ",(rib6-routes RIB6_ROUTES),(fib6-tbl8 FIB6_TBL8)" \ - ",(description DESCR)" + ",(mac MAC),(description DESCR)" #define VRF_ATTRS_ARGS \ with_help("Max IPv4 routes.", ec_node_uint("RIB4_ROUTES", 1, UINT32_MAX, 10)), \ with_help("IPv4 TBL8 groups.", ec_node_uint("FIB4_TBL8", 1, UINT32_MAX, 10)), \ with_help("Max IPv6 routes.", ec_node_uint("RIB6_ROUTES", 1, UINT32_MAX, 10)), \ with_help("IPv6 TBL8 groups.", ec_node_uint("FIB6_TBL8", 1, UINT32_MAX, 10)), \ + with_help("Set the ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), \ with_help("Interface description.", ec_node("any", "DESCR")) static void vrf_show(struct gr_api_client *, const struct gr_iface *iface, struct gr_object *o) { const struct gr_iface_info_vrf *info = PAYLOAD(iface); + gr_object_field(o, "mac", 0, ETH_F, &info->mac); gr_object_field(o, "rib4_max_routes", GR_DISP_INT, "%u", info->ipv4.max_routes); gr_object_field(o, "fib4_num_tbl8", GR_DISP_INT, "%u", info->ipv4.num_tbl8); gr_object_field(o, "rib6_max_routes", GR_DISP_INT, "%u", info->ipv6.max_routes); @@ -80,6 +82,11 @@ static uint64_t parse_vrf_args( set_attrs |= GR_VRF_SET_FIB; } + if (arg_eth_addr(p, "MAC", &info->mac) == 0) + set_attrs |= GR_VLAN_SET_MAC; + else if (errno != ENOENT) + return 0; + return set_attrs; } diff --git a/modules/infra/control/vrf.c b/modules/infra/control/vrf.c index 79a7cc665..df5522f61 100644 --- a/modules/infra/control/vrf.c +++ b/modules/infra/control/vrf.c @@ -151,6 +151,7 @@ static int netlink_vrf_add(const struct iface *iface) { strerror(errno)); return ret; } + netlink_link_set_mac(vrf->vrf_ifindex, &vrf->mac); } ret = netlink_add_route(iface->cp_id, table_id); @@ -229,6 +230,7 @@ static int iface_vrf_init(struct iface *iface, const void *api_info) { // VRF's vrf_id is its own iface_id (VRF identifier) iface->vrf_id = iface->id; vrf->ref_count = 0; + rte_eth_random_addr(vrf->mac.addr_bytes); if (iface_loopback_create(iface) < 0) return -errno; @@ -347,10 +349,24 @@ static int iface_vrf_reconfig( fib_conf->num_tbl8); } } + if (set_attrs & GR_VRF_SET_MAC && iface_set_eth_addr(iface, &info->mac) < 0) + return -errno; return 0; } +static int iface_vrf_get_eth_addr(const struct iface *iface, struct rte_ether_addr *mac) { + const struct iface_info_vrf *vrf = iface_info_vrf(iface); + *mac = vrf->mac; + return 0; +} + +static int iface_vrf_set_eth_addr(struct iface *iface, const struct rte_ether_addr *mac) { + struct iface_info_vrf *vrf = iface_info_vrf(iface); + vrf->mac = *mac; + return 0; +} + static void iface_vrf_to_api(void *info, const struct iface *iface) { const struct iface_info_vrf *vrf = iface_info_vrf(iface); struct gr_iface_info_vrf *api = info; @@ -364,6 +380,8 @@ static struct iface_type iface_type_vrf = { .priv_size = sizeof(struct iface_info_vrf), .init = iface_vrf_init, .reconfig = iface_vrf_reconfig, + .set_eth_addr = iface_vrf_set_eth_addr, + .get_eth_addr = iface_vrf_get_eth_addr, .fini = iface_vrf_fini, .to_api = iface_vrf_to_api, }; diff --git a/modules/ip6/control/address.c b/modules/ip6/control/address.c index dd09d1659..e8dc55ab6 100644 --- a/modules/ip6/control/address.c +++ b/modules/ip6/control/address.c @@ -420,6 +420,9 @@ static void ip6_iface_llocal_init(const struct iface *iface) { struct rte_ether_addr mac; unsigned i; + if (iface->type == GR_IFACE_TYPE_VRF) + return; // VRF interfaces shoulnd't have a link local address + if (iface_get_eth_addr(iface, &mac) < 0) return; From e71565117206112eedb9386f07be6c06628f153e Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:17:01 +0100 Subject: [PATCH 03/19] vxlan: inherit VRF MAC for L3VNI router MAC VXLAN interfaces now inherit from their VTEP VRF (encap_vrf_id) MAC address instead of a random one. Some EVPN endpoints make the assumption that there is a unique RMAC per VTEP. When a VXLAN interface is in VRF mode (or moves to one), also synchronize that MAC address to the VRF interface. This ensures the VXLAN interface's MAC matches the Router MAC that FRR advertises in EVPN type-5 routes, so incoming L3VPN packets pass eth_input's check. Signed-off-by: Robin Jarry --- modules/l2/control/vxlan.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/modules/l2/control/vxlan.c b/modules/l2/control/vxlan.c index 2d8d1c4db..ec44ce8a6 100644 --- a/modules/l2/control/vxlan.c +++ b/modules/l2/control/vxlan.c @@ -44,7 +44,7 @@ struct iface *vxlan_get_iface(rte_be32_t vni, uint16_t encap_vrf_id) { static int iface_vxlan_reconfig( struct iface *iface, uint64_t set_attrs, - const struct gr_iface *, + const struct gr_iface *conf, const void *api_info ) { struct iface_info_vxlan *cur = iface_info_vxlan(iface); @@ -136,10 +136,31 @@ static int iface_vxlan_reconfig( conf_done |= GR_VXLAN_SET_LOCAL; } - if (set_attrs & GR_VXLAN_SET_MAC) { - if (iface_set_eth_addr(iface, &next->mac) < 0) + if (set_attrs & (GR_IFACE_SET_VRF | GR_VXLAN_SET_ENCAP_VRF | GR_VXLAN_SET_MAC)) { + struct iface *vrf = get_vrf_iface(cur->encap_vrf_id); + struct rte_ether_addr mac = next->mac; + + assert(vrf != NULL); + + // Some devices assume a unique RMAC per VTEP. + // When no explicit MAC is given, inherit the VTEP VRF's MAC. + if (rte_is_zero_ether_addr(&mac)) + mac = iface_info_vrf(vrf)->mac; + + if (iface_set_eth_addr(iface, &mac) < 0) goto err; + conf_done |= GR_VXLAN_SET_MAC; + + // If configured for EVPN L3VNI, also synchronize the MAC on the interface VRF. + // So it will be advertised as RMAC by FRR. + vrf = NULL; + if (set_attrs & GR_IFACE_SET_VRF) + vrf = get_vrf_iface(conf->vrf_id); + else if (iface->mode == GR_IFACE_MODE_VRF) + vrf = get_vrf_iface(iface->vrf_id); + if (vrf != NULL && iface_set_eth_addr(vrf, &mac) < 0) + goto err; } // Update the datapath template from the current config. From 2519542e4cdba677f3126a30505add4aa4a0541f Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:18:52 +0100 Subject: [PATCH 04/19] ip,ip6: save VTEP address across mbuf private data All GR_MBUF_PRIV_DATA_TYPE types overlay the same memory region. When ip_output resolves a nexthop on a VXLAN interface, it writes eth_output_mbuf_data->dst and ->ether_type which clobber iface_mbuf_data->vtep. By the time vxlan_output reads the vtep field, it contains garbage. Add a vtep field to eth_output_mbuf_data and set it from the nexthop gateway IP when the output interface is a VXLAN. In eth_output, save the vtep value before writing iface_mbuf_data (which shares the same memory) and restore it after. Signed-off-by: Robin Jarry --- modules/infra/datapath/eth.h | 3 +++ modules/infra/datapath/eth_output.c | 2 ++ modules/ip/datapath/ip_output.c | 1 + modules/ip6/datapath/ip6_output.c | 1 + 4 files changed, 7 insertions(+) diff --git a/modules/infra/datapath/eth.h b/modules/infra/datapath/eth.h index 73da053bf..e28e124e5 100644 --- a/modules/infra/datapath/eth.h +++ b/modules/infra/datapath/eth.h @@ -5,6 +5,8 @@ #include "mbuf.h" +#include + #include #include @@ -22,6 +24,7 @@ GR_MBUF_PRIV_DATA_TYPE(eth_input_mbuf_data, { eth_domain_t domain; }) GR_MBUF_PRIV_DATA_TYPE(eth_output_mbuf_data, { struct rte_ether_addr dst; rte_be16_t ether_type; + ip4_addr_t vtep; }); void gr_eth_input_add_type(rte_be16_t eth_type, const char *node_name); diff --git a/modules/infra/datapath/eth_output.c b/modules/infra/datapath/eth_output.c index f85b0475c..04b079887 100644 --- a/modules/infra/datapath/eth_output.c +++ b/modules/infra/datapath/eth_output.c @@ -59,7 +59,9 @@ eth_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, t->src_addr = src_mac; t->ether_type = priv->ether_type; } + ip4_addr_t vtep = priv->vtep; iface_mbuf_data(mbuf)->vlan_id = 0; + iface_mbuf_data(mbuf)->vtep = vtep; rte_node_enqueue_x1(graph, node, edge, mbuf); } diff --git a/modules/ip/datapath/ip_output.c b/modules/ip/datapath/ip_output.c index 61a0dc14a..d4e158b5f 100644 --- a/modules/ip/datapath/ip_output.c +++ b/modules/ip/datapath/ip_output.c @@ -134,6 +134,7 @@ ip_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, u eth_data = eth_output_mbuf_data(mbuf); eth_data->dst = l3->mac; eth_data->ether_type = RTE_BE16(RTE_ETHER_TYPE_IPV4); + eth_data->vtep = (iface->type == GR_IFACE_TYPE_VXLAN) ? l3->ipv4 : 0; sent++; next: if (gr_mbuf_is_traced(mbuf)) { diff --git a/modules/ip6/datapath/ip6_output.c b/modules/ip6/datapath/ip6_output.c index 9eb89aa6c..ce7d723f6 100644 --- a/modules/ip6/datapath/ip6_output.c +++ b/modules/ip6/datapath/ip6_output.c @@ -116,6 +116,7 @@ ip6_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, else eth_data->dst = l3->mac; eth_data->ether_type = RTE_BE16(RTE_ETHER_TYPE_IPV6); + eth_data->vtep = (iface->type == GR_IFACE_TYPE_VXLAN) ? l3->ipv4 : 0; sent++; next: if (gr_mbuf_is_traced(mbuf)) { From 453f07d9f35e2b61492cc05a89df3edbdeb660d8 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:18:23 +0100 Subject: [PATCH 05/19] nexthop: add GR_NH_F_REMOTE flag Add a flag to mark nexthops learned from remote VTEPs via EVPN. These nexthops carry a known IP+MAC pair from the control plane and are set to GR_NH_S_REACHABLE with GR_NH_F_STATIC so they bypass ARP/ND probing and aging. Signed-off-by: Robin Jarry --- modules/infra/api/gr_nexthop.h | 3 +++ modules/infra/cli/nexthop.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index 9ac0085cb..bdf2b8e54 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -25,6 +25,7 @@ typedef enum : uint8_t { GR_NH_F_GATEWAY = GR_BIT8(2), // Gateway route. GR_NH_F_LINK = GR_BIT8(3), // Connected link route. GR_NH_F_MCAST = GR_BIT8(4), // Multicast address. + GR_NH_F_REMOTE = GR_BIT8(5), // Remote VTEP nexthop (EVPN). } gr_nh_flags_t; // Nexthop types for different forwarding behaviors. @@ -163,6 +164,8 @@ static inline const char *gr_nh_flag_name(const gr_nh_flags_t flag) { return "link"; case GR_NH_F_MCAST: return "multicast"; + case GR_NH_F_REMOTE: + return "remote"; } return "?"; } diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 3845c4652..0e5625eb5 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -383,6 +383,8 @@ static cmd_status_t nh_l3_add(struct gr_api_client *c, const struct ec_pnode *p) goto out; if (arg_eth_addr(p, "MAC", &l3->mac) < 0 && errno != ENOENT) goto out; + if (arg_str(p, "remote")) + l3->flags |= GR_NH_F_REMOTE; if (gr_api_client_send_recv(c, GR_NH_ADD, len, req, NULL) < 0) goto out; @@ -623,13 +625,14 @@ static int ctx_init(struct ec_node *root) { ret = CLI_COMMAND( NEXTHOP_ADD_CTX(root), - "l3 iface IFACE [(id ID),(address IP),(mac MAC)]", + "l3 iface IFACE [(id ID),(address IP),(mac MAC),(remote)]", nh_l3_add, "Add a new L3 nexthop.", with_help("IPv4/6 address.", ec_node_re("IP", IP_ANY_RE)), with_help("Ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), with_help("Nexthop ID.", ec_node_uint("ID", 1, UINT32_MAX - 1, 10)), - with_help("Output interface.", ec_node_dyn("IFACE", complete_iface_names, NULL)) + with_help("Output interface.", ec_node_dyn("IFACE", complete_iface_names, NULL)), + with_help("Mark as remote (EVPN).", ec_node_str("remote", "remote")) ); if (ret < 0) return ret; From 9ba8949b2b550c0e18fbf40a2737ab2eb6da3556 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:16:11 +0100 Subject: [PATCH 06/19] frr: add L3VNI dplane-thread state EVPN type-5 routes require two pieces of state that FRR delivers out of order: the remote router MAC arrives via DPLANE_OP_NEIGH_INSTALL before the nexthop via DPLANE_OP_NH_INSTALL. Both need to be combined when grout_add_nexthop builds the GR_NH_ADD request. Add l3vni_map to maintain two hash tables on the dplane thread: vrf_id -> vxlan_iface_id: used to redirect nexthops from the VRF interface (FRR's SVI model) to the VXLAN interface so that ip_output routes packets into the tunnel. (vrf_id, vtep) -> RMAC: caches the remote router MAC until the matching nexthop install arrives. Both tables run exclusively on the dplane thread so no locking is needed. Signed-off-by: Robin Jarry --- frr/l3vni_map.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ frr/l3vni_map.h | 45 ++++++++++++++++++ frr/meson.build | 1 + 3 files changed, 165 insertions(+) create mode 100644 frr/l3vni_map.c create mode 100644 frr/l3vni_map.h diff --git a/frr/l3vni_map.c b/frr/l3vni_map.c new file mode 100644 index 000000000..bf427e34a --- /dev/null +++ b/frr/l3vni_map.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2026 Robin Jarry + +#include "if_map.h" +#include "l3vni_map.h" + +#include + +#include +#include + +// All functions in this file run exclusively on the dplane thread +// (grout_link_change, grout_add_nexthop, grout_neigh_update_ctx). +// No locking required. + +// VRF -> VXLAN iface mapping /////////////////////////////////////////////////// + +PREDECL_HASH(l3vni_hash); + +struct l3vni_entry { + struct l3vni_hash_item item; + uint16_t vrf_id; + uint16_t vxlan_iface_id; +}; + +static int l3vni_cmp(const struct l3vni_entry *a, const struct l3vni_entry *b) { + return numcmp(a->vrf_id, b->vrf_id); +} + +static uint32_t l3vni_hashfn(const struct l3vni_entry *e) { + return e->vrf_id; +} + +DECLARE_HASH(l3vni_hash, struct l3vni_entry, item, l3vni_cmp, l3vni_hashfn); +static struct l3vni_hash_head l3vni_entries = INIT_HASH(l3vni_entries); + +void l3vni_set(uint16_t vrf_id, uint16_t vxlan_iface_id) { + struct l3vni_entry *e, key = {.vrf_id = vrf_id}; + + e = l3vni_hash_find(&l3vni_entries, &key); + if (e != NULL) { + e->vxlan_iface_id = vxlan_iface_id; + return; + } + e = XCALLOC(MTYPE_GROUT_MEM, sizeof(*e)); + e->vrf_id = vrf_id; + e->vxlan_iface_id = vxlan_iface_id; + l3vni_hash_add(&l3vni_entries, e); +} + +void l3vni_del(uint16_t vrf_id) { + struct l3vni_entry key = {.vrf_id = vrf_id}; + struct l3vni_entry *e = l3vni_hash_find(&l3vni_entries, &key); + + if (e != NULL) { + l3vni_hash_del(&l3vni_entries, e); + XFREE(MTYPE_GROUT_MEM, e); + } +} + +uint16_t l3vni_get_vxlan(uint16_t vrf_id) { + struct l3vni_entry key = {.vrf_id = vrf_id}; + struct l3vni_entry *e = l3vni_hash_find(&l3vni_entries, &key); + return e ? e->vxlan_iface_id : GR_IFACE_ID_UNDEF; +} + +// (VRF, VTEP) -> RMAC cache /////////////////////////////////////////////////// + +PREDECL_HASH(rmac_hash); + +struct rmac_entry { + struct rmac_hash_item item; + uint16_t vrf_id; + ip4_addr_t vtep; + struct ethaddr mac; +}; + +static int rmac_cmp(const struct rmac_entry *a, const struct rmac_entry *b) { + int r = numcmp(a->vrf_id, b->vrf_id); + return r ? r : numcmp(a->vtep, b->vtep); +} + +static uint32_t rmac_hashfn(const struct rmac_entry *e) { + return jhash_2words(e->vrf_id, e->vtep, 0); +} + +DECLARE_HASH(rmac_hash, struct rmac_entry, item, rmac_cmp, rmac_hashfn); +static struct rmac_hash_head rmac_entries = INIT_HASH(rmac_entries); + +void l3vni_rmac_set(uint16_t vrf_id, ip4_addr_t vtep, const struct ethaddr *mac) { + struct rmac_entry *e, key = {.vrf_id = vrf_id, .vtep = vtep}; + + e = rmac_hash_find(&rmac_entries, &key); + if (e != NULL) { + e->mac = *mac; + return; + } + e = XCALLOC(MTYPE_GROUT_MEM, sizeof(*e)); + e->vrf_id = vrf_id; + e->vtep = vtep; + e->mac = *mac; + rmac_hash_add(&rmac_entries, e); +} + +void l3vni_rmac_del(uint16_t vrf_id, ip4_addr_t vtep) { + struct rmac_entry key = {.vrf_id = vrf_id, .vtep = vtep}; + struct rmac_entry *e = rmac_hash_find(&rmac_entries, &key); + + if (e != NULL) { + rmac_hash_del(&rmac_entries, e); + XFREE(MTYPE_GROUT_MEM, e); + } +} + +const struct ethaddr *l3vni_rmac_get(uint16_t vrf_id, ip4_addr_t vtep) { + struct rmac_entry key = {.vrf_id = vrf_id, .vtep = vtep}; + struct rmac_entry *e = rmac_hash_find(&rmac_entries, &key); + return e ? &e->mac : NULL; +} diff --git a/frr/l3vni_map.h b/frr/l3vni_map.h new file mode 100644 index 000000000..a005dfbe5 --- /dev/null +++ b/frr/l3vni_map.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2026 Robin Jarry + +// L3VNI dplane-thread state for EVPN symmetric IRB (Integrated Routing and +// Bridging). +// +// FRR's EVPN type-5 (IP prefix) routes use a per-VRF L3 VNI with a VXLAN +// interface. Two mappings are maintained on the dplane thread (no locking): +// +// VRF -> VXLAN iface +// +// grout_add_nexthop() redirects nexthops from the VRF (FRR's SVI model) to +// the VXLAN interface so that ip_output routes packets into the tunnel. +// +// (VRF, VTEP) -> RMAC +// +// DPLANE_OP_NEIGH_INSTALL delivers the remote router MAC before +// DPLANE_OP_NH_INSTALL creates the nexthop. The RMAC is cached here and +// applied by grout_add_nexthop() when the nexthop arrives. + +#pragma once + +#include "lib/prefix.h" + +#include + +#include + +// Register vrf_id -> vxlan_iface_id mapping. +void l3vni_set(uint16_t vrf_id, uint16_t vxlan_iface_id); + +// Remove mapping for vrf_id. +void l3vni_del(uint16_t vrf_id); + +// Return vxlan iface id for vrf_id, or GR_IFACE_ID_UNDEF. +uint16_t l3vni_get_vxlan(uint16_t vrf_id); + +// Cache remote VTEP router MAC for (vrf_id, vtep). +void l3vni_rmac_set(uint16_t vrf_id, ip4_addr_t vtep, const struct ethaddr *mac); + +// Remove cached RMAC for (vrf_id, vtep). +void l3vni_rmac_del(uint16_t vrf_id, ip4_addr_t vtep); + +// Look up cached RMAC for (vrf_id, vtep), or NULL. +const struct ethaddr *l3vni_rmac_get(uint16_t vrf_id, ip4_addr_t vtep); diff --git a/frr/meson.build b/frr/meson.build index 387c25b31..ef0824139 100644 --- a/frr/meson.build +++ b/frr/meson.build @@ -23,6 +23,7 @@ frr_plugin = shared_module( files( 'if_grout.c', 'if_map.c', + 'l3vni_map.c', 'rt_grout.c', 'zebra_dplane_grout.c', ), From 81b9f3a0ddc19cfcce0f29e73fb050244c17b221 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:19:19 +0100 Subject: [PATCH 07/19] frr: add L3VPN support Handle EVPN type-5 (IP prefix) routes with symmetric IRB over VXLAN. Present VRF-mode VXLAN interfaces to FRR as bridge slaves of the VRF interface. FRR requires an SVI derived from a bridge master to bring the L3VNI up and compute the Router MAC for type-5 routes. The VRF MAC (set in a previous commit) serves as the Router MAC. Handle DPLANE_OP_NEIGH_INSTALL/DELETE to cache remote router MACs delivered by FRR before the corresponding nexthop install arrives. When grout_add_nexthop processes an L3 nexthop in a VRF with an L3VNI, it redirects the interface from VRF to VXLAN and applies the cached RMAC. Signed-off-by: Robin Jarry --- frr/if_grout.c | 13 +++++++++++++ frr/rt_grout.c | 42 ++++++++++++++++++++++++++++++++++++++++ frr/rt_grout.h | 1 + frr/zebra_dplane_grout.c | 5 +++++ 4 files changed, 61 insertions(+) diff --git a/frr/if_grout.c b/frr/if_grout.c index 576c8d3c3..046f74884 100644 --- a/frr/if_grout.c +++ b/frr/if_grout.c @@ -3,6 +3,7 @@ #include "if_grout.h" #include "if_map.h" +#include "l3vni_map.h" #include "log_grout.h" #include @@ -136,6 +137,16 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { dplane_ctx_set_ifp_table_id( ctx, vrf_grout_to_frr(gr_if->base.vrf_id) ); + + // For VXLAN in VRF mode, present it as a bridge slave + // of the VRF interface. FRR requires an SVI (derived + // from the bridge master) to bring the L3VNI up and + // compute the Router MAC for EVPN type-5 routes. + if (zif_type == ZEBRA_IF_VXLAN) { + bridge_ifindex = ifindex_grout_to_frr(gr_if->base.vrf_id); + slave_type = ZEBRA_IF_SLAVE_BRIDGE; + l3vni_set(gr_if->base.vrf_id, gr_if->id); + } break; case GR_IFACE_MODE_BOND: bond_ifindex = ifindex_grout_to_frr(gr_if->domain_id); @@ -182,6 +193,8 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { } else { dplane_ctx_set_op(ctx, DPLANE_OP_INTF_DELETE); dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_QUEUED); + if (gr_vxlan != NULL && gr_if->mode == GR_IFACE_MODE_VRF) + l3vni_del(gr_if->base.vrf_id); remove_mapping_by_grout_ifindex(gr_if->id); } diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 382fd2a71..38e587a7e 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -2,6 +2,7 @@ // Copyright (c) 2025 Maxime Leroy, Free Mobile #include "if_map.h" +#include "l3vni_map.h" #include "log_grout.h" #include "rt_grout.h" @@ -622,7 +623,9 @@ grout_add_nexthop(uint32_t nh_id, gr_nh_origin_t origin, const struct nexthop *n struct gr_nexthop_info_srv6 *sr6; struct gr_nh_add_req *req = NULL; struct gr_nexthop_info_l3 *l3; + const struct ethaddr *rmac; size_t len = sizeof(*req); + uint16_t vxlan_iface_id; gr_nh_type_t type; switch (nh->type) { @@ -670,12 +673,25 @@ grout_add_nexthop(uint32_t nh_id, gr_nh_origin_t origin, const struct nexthop *n switch (type) { case GR_NH_T_L3: + // For L3 nexthops in VRFs with an L3VNI, redirect the iface from + // the VRF (SVI in FRR's model) to the VXLAN interface. Grout + // routes packets directly through the VXLAN tunnel. + vxlan_iface_id = l3vni_get_vxlan(req->nh.vrf_id); + if (vxlan_iface_id != GR_IFACE_ID_UNDEF) + req->nh.iface_id = vxlan_iface_id; + switch (nh->type) { case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: l3 = (struct gr_nexthop_info_l3 *)req->nh.info; l3->af = GR_AF_IP4; memcpy(&l3->ipv4, &nh->gate.ipv4, sizeof(l3->ipv4)); + // Apply cached RMAC from EVPN NEIGH install if available. + rmac = l3vni_rmac_get(req->nh.vrf_id, l3->ipv4); + if (rmac != NULL) { + memcpy(&l3->mac, rmac, sizeof(l3->mac)); + l3->flags |= GR_NH_F_REMOTE; + } break; case NEXTHOP_TYPE_IPV6: case NEXTHOP_TYPE_IPV6_IFINDEX: @@ -952,6 +968,32 @@ enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx) { return ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; } +enum zebra_dplane_result grout_neigh_update_ctx(struct zebra_dplane_ctx *ctx) { + const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); + bool add = dplane_ctx_get_op(ctx) != DPLANE_OP_NEIGH_DELETE; + uint16_t vrf_id = vrf_frr_to_grout(dplane_ctx_get_vrf(ctx)); + + if (addr->ipa_type != IPADDR_V4) { + gr_log_debug("only IPv4 VTEP addresses supported, skip"); + return ZEBRA_DPLANE_REQUEST_SUCCESS; + } + + // Cache the RMAC for later use by grout_add_nexthop. We cannot + // create a separate nexthop here because grout's L3 nexthop hash + // keys on (vrf, addr) without iface_id, so it would collide with + // the route nexthop that FRR installs right after. + if (add) { + const struct ethaddr *mac = dplane_ctx_neigh_get_mac(ctx); + gr_log_debug("cache rmac vrf=%u %pIA %pEA", vrf_id, addr, mac); + l3vni_rmac_set(vrf_id, addr->ipaddr_v4.s_addr, mac); + } else { + gr_log_debug("uncache rmac vrf=%u %pIA", vrf_id, addr); + l3vni_rmac_del(vrf_id, addr->ipaddr_v4.s_addr); + } + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx) { const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); bool add = dplane_ctx_get_op(ctx) == DPLANE_OP_VTEP_ADD; diff --git a/frr/rt_grout.h b/frr/rt_grout.h index d58167cdb..c876e8ef9 100644 --- a/frr/rt_grout.h +++ b/frr/rt_grout.h @@ -18,4 +18,5 @@ void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup); void grout_macfdb_change(const struct gr_fdb_entry *fdb, bool new); enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx); +enum zebra_dplane_result grout_neigh_update_ctx(struct zebra_dplane_ctx *ctx); enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx); diff --git a/frr/zebra_dplane_grout.c b/frr/zebra_dplane_grout.c index eac6b6d2b..9b4d0ea91 100644 --- a/frr/zebra_dplane_grout.c +++ b/frr/zebra_dplane_grout.c @@ -676,6 +676,11 @@ static enum zebra_dplane_result zd_grout_process_update(struct zebra_dplane_ctx case DPLANE_OP_MAC_DELETE: return grout_macfdb_update_ctx(ctx); + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + return grout_neigh_update_ctx(ctx); + case DPLANE_OP_VTEP_ADD: case DPLANE_OP_VTEP_DELETE: return grout_vxlan_flood_update_ctx(ctx); From 9cb705cd3b55ad13e8907d6cbc78ace8ee0479bd Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 11:19:21 +0100 Subject: [PATCH 08/19] smoke: add EVPN L3VPN test Verify EVPN type-5 IP prefix route exchange and L3 connectivity over VXLAN between FRR+grout and a standalone FRR+Linux peer. Each side has a VRF with an L3 VNI (1000) and a host on a local subnet. BGP EVPN advertises connected prefixes as type-5 routes. Grout uses a VXLAN in VRF mode (no bridge needed). The peer uses the standard Linux bridge+SVI model. The test verifies L3VNI recognition, type-5 route exchange, route installation in the VRF, RMAC presence on the route nexthop, and end-to-end ping through the overlay. Signed-off-by: Robin Jarry --- smoke/evpn_l3vpn_frr_test.sh | 254 +++++++++++++++++++++++++++++++++++ 1 file changed, 254 insertions(+) create mode 100755 smoke/evpn_l3vpn_frr_test.sh diff --git a/smoke/evpn_l3vpn_frr_test.sh b/smoke/evpn_l3vpn_frr_test.sh new file mode 100755 index 000000000..aa4960a30 --- /dev/null +++ b/smoke/evpn_l3vpn_frr_test.sh @@ -0,0 +1,254 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +# This test verifies EVPN Type-5 (IP prefix) L3VPN connectivity using symmetric +# IRB (Integrated Routing and Bridging) over VXLAN between FRR+Grout and +# a standalone FRR+Linux peer. +# +# Each side has a VRF with an L3 VNI (1000) and a host connected to a local +# port. BGP EVPN advertises IP prefixes (type-5 routes) and RMAC entries +# (type-2 routes with GR_NH_F_REMOTE nexthops) across the VXLAN overlay. +# +# Success criteria: +# - Both sides exchange EVPN type-5 routes (IP prefixes installed). +# - Host-A and Host-B can ping each other through the L3 VXLAN overlay. +# - RMACs are installed as remote nexthops on the grout side. +# +# .-------------------------------. .-----------------------------. +# | evpn-peer | | grout | +# | | | | +# | .- - - - - - - . | | .- - - - - - - . | +# | ' vrf tenant ' | | ' vrf tenant ' | +# | ' ' | | ' ' | +# | ' +-------+ ' | | ' ' | +# | ' | br-l3 | ' | | ' ' | +# | ' +---+---+ ' | | ' ' | +# | ' | ' | | ' ' | +# | ' +----+-----+ ' | | ' +----------+ ' | +# | ' | vxlan-l3 |........... | | ..........| vxlan-l3 | ' | +# | ' +----------+ ' . | | . ' +----------+ ' | +# | ' ' . | | . ' ' | +# | ' .1 ' . | | . ' .1 ' | +# | ' +------+ ' .1 | | .2 ' +-------+ ' | +# | ' | p1 | ' +--------+ | | +------+ ' | p1 | ' | +# | ' +--+---+ ' | x-p0 | | | | p0 | ' +---+---+ ' | +# | '- - - |- - - -' +---+----+ | | +--+---+ '- - - |- - - -' | +# '--------|---------------|------' '----|--------------|---------' +# | | | | +# | | <------- BGP ----> | | +# 16.0.0.0/24 '---------------------' 48.0.0.0/24 +# | underlay | +# .-------|-----------. 172.16.0.0/24 .----------|--------. +# | +---+----+ | | +---+----+ | +# | | x-p1 | | | | x-p1 | | +# | +--------+ | <= = = = = = = = = = = = => | +--------+ | +# | .2 | overlay L3VPN | .2 | +# | | | | +# | host-a | | host-b | +# '-------------------' '-------------------' + +. $(dirname $0)/_init_frr.sh + +# right side (grout) ----------------------------------------------------------- +create_interface p0 +set_ip_address p0 172.16.0.2/24 + +# left side (Linux peer) ------------------------------------------------------- +start_frr evpn-peer + +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.forwarding=1 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.default.rp_filter=0 + +move_to_netns x-p0 evpn-peer +ip -n evpn-peer addr add 172.16.0.1/24 dev x-p0 + +# Create L3VNI VXLAN on the Linux peer with a bridge+SVI (required by Linux) +ip -n evpn-peer link add br-l3 type bridge +ip -n evpn-peer link set br-l3 up + +ip -n evpn-peer link add vxlan-l3 type vxlan id 1000 local 172.16.0.1 dstport 4789 nolearning +ip -n evpn-peer link set vxlan-l3 master br-l3 +ip -n evpn-peer link set vxlan-l3 up + +# Create VRF "tenant" on the peer and bind the L3VNI bridge as SVI +ip -n evpn-peer link add tenant type vrf table 10 +ip -n evpn-peer link set tenant up +ip -n evpn-peer link set br-l3 master tenant + +# Host-facing port in the peer VRF +ip -n evpn-peer link add p1 type veth peer name x-p1 +ip -n evpn-peer link set p1 master tenant +ip -n evpn-peer link set p1 up +ip -n evpn-peer addr add 16.0.0.1/24 dev p1 + +netns_add host-a +ip -n evpn-peer link set x-p1 netns host-a +ip -n host-a link set x-p1 up +ip -n host-a addr add 16.0.0.2/24 dev x-p1 +ip -n host-a route add default via 16.0.0.1 + +# FRR config on the Linux peer +vtysh -N evpn-peer <<-EOF +configure terminal + +vrf tenant + vni 1000 +exit-vrf + +router bgp 65000 + bgp router-id 172.16.0.1 + no bgp default ipv4-unicast + + neighbor 172.16.0.2 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.2 activate + advertise-all-vni + exit-address-family +exit + +router bgp 65000 vrf tenant + bgp router-id 172.16.0.1 + + address-family ipv4 unicast + redistribute connected + exit-address-family + + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +exit +EOF + +# right side (grout) setup L3VPN ----------------------------------------------- +create_vrf tenant + +# L3 VNI VXLAN in VRF mode (no bridge needed in grout) +grcli interface add vxlan vxlan-l3 vni 1000 local 172.16.0.2 vrf tenant + +create_interface p1 vrf tenant +set_ip_address p1 48.0.0.1/24 + +netns_add host-b +move_to_netns x-p1 host-b +ip -n host-b addr add 48.0.0.2/24 dev x-p1 +ip -n host-b route add default via 48.0.0.1 + +# FRR config on grout +vtysh <<-EOF +configure terminal + +vrf tenant + vni 1000 +exit-vrf + +router bgp 65000 + bgp router-id 172.16.0.2 + no bgp default ipv4-unicast + + neighbor 172.16.0.1 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.1 activate + advertise-all-vni + exit-address-family +exit + +router bgp 65000 vrf tenant + bgp router-id 172.16.0.2 + + address-family ipv4 unicast + redistribute connected + exit-address-family + + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +exit +EOF + +# -- Check L3VNI is recognized by both sides ----------------------------------- +attempts=0 +while ! vtysh -c "show evpn vni 1000" | grep -qF "L3"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show evpn vni" + fail "Grout FRR does not recognize VNI 1000 as L3VNI" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! vtysh -N evpn-peer -c "show evpn vni 1000" | grep -qF "L3"; do + if [ "$attempts" -ge 5 ]; then + vtysh -N evpn-peer -c "show evpn vni" + fail "Linux peer does not recognize VNI 1000 as L3VNI" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Wait for EVPN type-5 route exchange --------------------------------------- +attempts=0 +while ! vtysh -c "show bgp l2vpn evpn route type 5" | grep -qF "16.0.0.0"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show bgp l2vpn evpn route type 5" + fail "Grout FRR did not learn type-5 route for 16.0.0.0/24" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 5" | grep -qF "48.0.0.0"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show bgp vrf tenant ipv4 unicast" + vtysh -c "show bgp l2vpn evpn route" + vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 5" + fail "Linux peer did not learn type-5 route for 48.0.0.0/24" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Wait for routes to be installed in VRF ------------------------------------ +attempts=0 +while ! grcli -j route show vrf tenant | jq -e '.[] | select(.destination == "16.0.0.0/24")'; do + if [ "$attempts" -ge 5 ]; then + grcli route show vrf tenant + fail "Route 16.0.0.0/24 not installed in grout VRF tenant" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! ip -n evpn-peer route show vrf tenant | grep -qF "48.0.0.0/24"; do + if [ "$attempts" -ge 5 ]; then + ip -n evpn-peer route show vrf tenant + fail "Route 48.0.0.0/24 not installed in peer VRF tenant" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Check RMAC is set on the route nexthop ------------------------------------ +attempts=0 +while ! grcli nexthop show | grep -q "172.16.0.1.*remote"; do + if [ "$attempts" -ge 10 ]; then + grcli nexthop show + fail "Remote RMAC not set on route nexthop" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +vtysh -c "show bgp l2vpn evpn route type 5" +grcli route show vrf tenant +grcli nexthop show vrf tenant + +# -- Verify L3 connectivity through VXLAN overlay ------------------------------ +ip netns exec host-b ping -i0.1 -c3 -W1 16.0.0.2 +ip netns exec host-a ping -i0.1 -c3 -W1 48.0.0.2 From c13c81729424f5a2b81692e54140daf65bd2ea6a Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:40:07 +0200 Subject: [PATCH 09/19] ip,ip6: allow learning neighbors from unsolicited ARP/NDP Add a per-interface flag to control neighbor snooping. Configurable via grcli: interface set port p0 neigh_snoop on/off. When GR_IFACE_F_NEIGH_SNOOP is enabled on an interface, forward unsolicited ARP replies and Neighbor Advertisements to the control plane instead of dropping them. The existing arp_probe_input_cb and ndp_probe_input_cb callbacks already handle creating nexthops for unknown senders, so no control plane changes are needed. This allows grout to passively learn IP+MAC bindings from ARP/NDP traffic observed on an interface, similar to Linux's arp_accept sysctl. Signed-off-by: Robin Jarry --- modules/infra/api/gr_infra.h | 1 + modules/infra/cli/cli_iface.h | 7 ++++++- modules/infra/cli/iface.c | 9 +++++++++ modules/ip/datapath/arp_input_reply.c | 2 +- modules/ip6/datapath/ndp_na_input.c | 2 +- 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index b249597fd..3d562ea71 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -34,6 +34,7 @@ typedef enum : uint16_t { GR_IFACE_F_PACKET_TRACE = GR_BIT16(2), GR_IFACE_F_SNAT_STATIC = GR_BIT16(3), GR_IFACE_F_SNAT_DYNAMIC = GR_BIT16(4), + GR_IFACE_F_NEIGH_SNOOP = GR_BIT16(5), } gr_iface_flags_t; // Interface state flags. diff --git a/modules/infra/cli/cli_iface.h b/modules/infra/cli/cli_iface.h index 4bb12d677..10baa868f 100644 --- a/modules/infra/cli/cli_iface.h +++ b/modules/infra/cli/cli_iface.h @@ -74,7 +74,8 @@ int arg_iface( CLI_CONTEXT(root, INTERFACE_ARG, CTX_ARG("set", "Modify an existing interface.")) #define IFACE_ATTRS_CMD \ - "(up|down),(promisc PROMISC),(mtu MTU),((vrf VRF)|(domain DOMAIN)),(description DESCR)" + "(up|down),(promisc PROMISC),(neigh_snoop NEIGH_SNOOP),(mtu MTU)," \ + "((vrf VRF)|(domain DOMAIN)),(description DESCR)" #define IFACE_ATTRS_ARGS \ with_help("Set the interface UP.", ec_node_str("up", "up")), \ @@ -83,6 +84,10 @@ int arg_iface( EC_NODE_OR("PROMISC", ec_node_str("", "on"), ec_node_str("", "off")) \ ), \ with_help("Set the interface DOWN.", ec_node_str("down", "down")), \ + with_help( \ + "Enable/disable neighbor snooping (learn from unsolicited ARP/NDP).", \ + EC_NODE_OR("NEIGH_SNOOP", ec_node_str("", "on"), ec_node_str("", "off")) \ + ), \ with_help( \ "Maximum transmission unit size.", \ ec_node_uint("MTU", 1280, UINT16_MAX - 1, 10) \ diff --git a/modules/infra/cli/iface.c b/modules/infra/cli/iface.c index d18c37e19..a728e8908 100644 --- a/modules/infra/cli/iface.c +++ b/modules/infra/cli/iface.c @@ -261,6 +261,15 @@ uint64_t parse_iface_args( set_attrs |= GR_IFACE_SET_FLAGS; } + const char *neigh_snoop = arg_str(p, "NEIGH_SNOOP"); + if (neigh_snoop != NULL && strcmp(neigh_snoop, "on") == 0) { + iface->flags |= GR_IFACE_F_NEIGH_SNOOP; + set_attrs |= GR_IFACE_SET_FLAGS; + } else if (neigh_snoop != NULL && strcmp(neigh_snoop, "off") == 0) { + iface->flags &= ~GR_IFACE_F_NEIGH_SNOOP; + set_attrs |= GR_IFACE_SET_FLAGS; + } + if (arg_u16(p, "MTU", &iface->mtu) == 0) set_attrs |= GR_IFACE_SET_MTU; diff --git a/modules/ip/datapath/arp_input_reply.c b/modules/ip/datapath/arp_input_reply.c index 365c20164..191338cbe 100644 --- a/modules/ip/datapath/arp_input_reply.c +++ b/modules/ip/datapath/arp_input_reply.c @@ -37,7 +37,7 @@ static uint16_t arp_input_reply_process( iface = mbuf_data(mbuf)->iface; remote = nh4_lookup(iface->vrf_id, arp->arp_data.arp_sip); - if (remote != NULL) { + if (remote != NULL || (iface->flags & GR_IFACE_F_NEIGH_SNOOP)) { control_output_set_cb(mbuf, arp_probe_input_cb, 0); rte_node_enqueue_x1(graph, node, CONTROL, mbuf); } else { diff --git a/modules/ip6/datapath/ndp_na_input.c b/modules/ip6/datapath/ndp_na_input.c index b4e33b610..8fbe3a14c 100644 --- a/modules/ip6/datapath/ndp_na_input.c +++ b/modules/ip6/datapath/ndp_na_input.c @@ -77,7 +77,7 @@ static uint16_t ndp_na_input_process( // recipient has apparently not initiated any communication with the // target. remote = nh6_lookup(iface->vrf_id, iface->id, &na->target); - if (remote == NULL) { + if (remote == NULL && !(iface->flags & GR_IFACE_F_NEIGH_SNOOP)) { edge = DROP; goto next; } From 0f80959978e6f05033889674beeee2c15b8c5d15 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:42:00 +0200 Subject: [PATCH 10/19] nexthop: add GR_NH_ORIGIN_NEIGH for learned neighbors ARP/NDP-learned nexthops were created with GR_NH_ORIGIN_INTERNAL which suppresses event generation. Add GR_NH_ORIGIN_NEIGH so these nexthops produce GR_EVENT_NEXTHOP_NEW and GR_EVENT_NEXTHOP_UPDATE events that the FRR plugin can receive and translate into neighbor entries for zebra. The associated host routes (/32, /128) remain GR_NH_ORIGIN_INTERNAL since they are internal plumbing and should not be visible to FRR. Signed-off-by: Robin Jarry --- modules/infra/api/gr_nexthop.h | 3 +++ modules/infra/control/nexthop.c | 5 +++-- modules/ip/control/nexthop.c | 2 +- modules/ip6/control/nexthop.c | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index bdf2b8e54..b48a35a98 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -75,6 +75,7 @@ typedef enum : uint8_t { GR_NH_ORIGIN_ZSTATIC = 196, // (RTPROT_ZSTATIC from zebra) GR_NH_ORIGIN_OPENFABRIC = 197, // (RTPROT_OPENFABIC from zebra) GR_NH_ORIGIN_SRTE = 198, // (RTPROT_SRTE from zebra) + GR_NH_ORIGIN_NEIGH = 254, // Learned from ARP/NDP traffic. GR_NH_ORIGIN_INTERNAL = 255, // Reserved for internal use (no events, no ID allocation). } gr_nh_origin_t; @@ -256,6 +257,8 @@ static inline const char *gr_nh_origin_name(gr_nh_origin_t origin) { return "openfabric"; case GR_NH_ORIGIN_SRTE: return "srte"; + case GR_NH_ORIGIN_NEIGH: + return "neigh"; case GR_NH_ORIGIN_INTERNAL: return "INTERNAL"; } diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index 7cd67828b..64ceccc02 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -100,8 +100,8 @@ static void nexthop_id_put(struct nexthop *nh) { static int nexthop_id_get(struct nexthop *nh) { int ret; - // no id for internal, as we should not let user manipulate it - if (nh->origin == GR_NH_ORIGIN_INTERNAL) { + // no id for internal/neigh, as we should not let user manipulate them + if (nh->origin == GR_NH_ORIGIN_INTERNAL || nh->origin == GR_NH_ORIGIN_NEIGH) { nh->nh_id = 0; return 0; } @@ -271,6 +271,7 @@ bool nexthop_origin_valid(gr_nh_origin_t origin) { case GR_NH_ORIGIN_ZSTATIC: case GR_NH_ORIGIN_OPENFABRIC: case GR_NH_ORIGIN_SRTE: + case GR_NH_ORIGIN_NEIGH: case GR_NH_ORIGIN_INTERNAL: return true; } diff --git a/modules/ip/control/nexthop.c b/modules/ip/control/nexthop.c index c9006146c..721a69a0d 100644 --- a/modules/ip/control/nexthop.c +++ b/modules/ip/control/nexthop.c @@ -162,7 +162,7 @@ void arp_probe_input_cb(void *obj, uintptr_t, const struct control_queue_drain * nh = nexthop_new( &(struct gr_nexthop_base) { .type = GR_NH_T_L3, - .origin = GR_NH_ORIGIN_INTERNAL, + .origin = GR_NH_ORIGIN_NEIGH, .iface_id = iface->id, .vrf_id = iface->vrf_id, }, diff --git a/modules/ip6/control/nexthop.c b/modules/ip6/control/nexthop.c index 2012bda1d..416f34dad 100644 --- a/modules/ip6/control/nexthop.c +++ b/modules/ip6/control/nexthop.c @@ -201,7 +201,7 @@ void ndp_probe_input_cb(void *obj, uintptr_t, const struct control_queue_drain * .type = GR_NH_T_L3, .iface_id = iface->id, .vrf_id = iface->vrf_id, - .origin = GR_NH_ORIGIN_INTERNAL, + .origin = GR_NH_ORIGIN_NEIGH, }, &(struct gr_nexthop_info_l3) { .af = GR_AF_IP6, From c64cbabe439616905ec5ee7cb9199aea98093f19 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 21:50:56 +0200 Subject: [PATCH 11/19] cli: add nexthop flush command Add GR_NH_FLUSH API request and grcli command to delete all nexthops matching a given origin. This is useful for clearing ARP/NDP-learned neighbors (origin neigh) which don't have IDs. Signed-off-by: Robin Jarry --- modules/infra/api/gr_nexthop.h | 9 +++++++++ modules/infra/api/nexthop.c | 24 +++++++++++++++++++++++ modules/infra/cli/nexthop.c | 36 ++++++++++++++++++++++++++++++++++ 3 files changed, 69 insertions(+) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index b48a35a98..d470426e5 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -339,6 +339,15 @@ struct gr_nh_list_req { STREAM_RESP(struct gr_nexthop); +// Flush nexthops matching the given origin. +#define GR_NH_FLUSH REQUEST_TYPE(GR_INFRA_MODULE, 0x0075) + +struct gr_nh_flush_req { + gr_nh_origin_t origin; +}; + +// struct gr_nh_flush_resp { }; + // Get a single nexthop by ID. #define GR_NH_GET REQUEST_TYPE(GR_INFRA_MODULE, 0x0074) diff --git a/modules/infra/api/nexthop.c b/modules/infra/api/nexthop.c index fad53e6a7..b4e1cfccc 100644 --- a/modules/infra/api/nexthop.c +++ b/modules/infra/api/nexthop.c @@ -127,6 +127,29 @@ static struct api_out nh_get(const void *request, struct api_ctx *) { return api_out(0, len, pub); } +static struct api_out nh_flush(const void *request, struct api_ctx *) { + const struct gr_nh_flush_req *req = request; + struct nexthop *nh = NULL; + + for (;;) { + nh = nexthop_next(nh); + if (nh == NULL) + break; + if (nh->origin != req->origin) + continue; + if (nh->type == GR_NH_T_L3) { + struct nexthop_info_l3 *l3 = nexthop_info_l3(nh); + if ((l3->flags & NH_LOCAL_ADDR_FLAGS) == NH_LOCAL_ADDR_FLAGS) + continue; + } + nexthop_routes_cleanup(nh); + while (nh->ref_count > 0) + nexthop_decref(nh); + } + + return api_out(0, 0, NULL); +} + RTE_INIT(_init) { api_handler(GR_NH_CONFIG_GET, nh_config_get); api_handler(GR_NH_CONFIG_SET, nh_config_set); @@ -134,4 +157,5 @@ RTE_INIT(_init) { api_handler(GR_NH_DEL, nh_del); api_handler(GR_NH_LIST, nh_list); api_handler(GR_NH_GET, nh_get); + api_handler(GR_NH_FLUSH, nh_flush); } diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 0e5625eb5..89dbeb213 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -430,6 +430,26 @@ static cmd_status_t nh_del(struct gr_api_client *c, const struct ec_pnode *p) { return CMD_SUCCESS; } +static cmd_status_t nh_flush(struct gr_api_client *c, const struct ec_pnode *p) { + struct gr_nh_flush_req req = {}; + const char *origin = arg_str(p, "ORIGIN"); + + if (origin == NULL) + return CMD_ERROR; + + if (strcmp(origin, "neigh") == 0) + req.origin = GR_NH_ORIGIN_NEIGH; + else if (strcmp(origin, "static") == 0) + req.origin = GR_NH_ORIGIN_STATIC; + else + return CMD_ERROR; + + if (gr_api_client_send_recv(c, GR_NH_FLUSH, sizeof(req), &req, NULL) < 0) + return CMD_ERROR; + + return CMD_SUCCESS; +} + static cmd_status_t nh_group_add(struct gr_api_client *c, const struct ec_pnode *p) { struct gr_nexthop_info_group *group; struct gr_nh_add_req *req = NULL; @@ -673,6 +693,22 @@ static int ctx_init(struct ec_node *root) { "Delete a next hop.", with_help("Nexthop ID.", ec_node_uint("ID", 1, UINT32_MAX - 1, 10)) ); + if (ret < 0) + return ret; + ret = CLI_COMMAND( + NEXTHOP_CTX(root), + "flush origin ORIGIN", + nh_flush, + "Flush all nexthops with the given origin.", + with_help( + "Nexthop origin.", + EC_NODE_OR( + "ORIGIN", + ec_node_str("neigh", "neigh"), + ec_node_str("static", "static") + ) + ) + ); if (ret < 0) return ret; ret = CLI_COMMAND( From f817fbd13ccf52870901dc60aa37acbf74d795d3 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 21:40:49 +0200 Subject: [PATCH 12/19] nexthop: allow deleting by key instead of ID Change GR_NH_DEL to accept a full struct gr_nexthop instead of just an ID. The handler uses nexthop_lookup() which first tries the ID if specified or falls back to the type-specific key-based lookup (e.g. vrf+addr for L3). This will allow the FRR plugin to delete remote nexthops installed from EVPN neighbor entries without tracking their auto-allocated IDs. Signed-off-by: Robin Jarry --- frr/rt_grout.c | 2 +- modules/infra/api/gr_nexthop.h | 2 +- modules/infra/api/nexthop.c | 2 +- modules/infra/cli/nexthop.c | 2 +- modules/infra/control/nexthop.c | 9 ++++----- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 38e587a7e..21916e468 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -609,7 +609,7 @@ static enum zebra_dplane_result grout_add_nexthop_group(struct zebra_dplane_ctx static enum zebra_dplane_result grout_del_nexthop(uint32_t nh_id) { gr_log_debug("nh_id %u", nh_id); - struct gr_nh_del_req req = {.missing_ok = true, .nh_id = nh_id}; + struct gr_nh_del_req req = {.missing_ok = true, .nh = {.nh_id = nh_id}}; if (grout_client_send_recv(GR_NH_DEL, sizeof(req), &req, NULL) < 0) return ZEBRA_DPLANE_REQUEST_FAILURE; diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index d470426e5..6ee391c55 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -321,8 +321,8 @@ struct gr_nh_add_req { #define GR_NH_DEL REQUEST_TYPE(GR_INFRA_MODULE, 0x0072) struct gr_nh_del_req { - uint32_t nh_id; uint8_t missing_ok; + struct gr_nexthop nh; }; // struct gr_nh_del_resp { }; diff --git a/modules/infra/api/nexthop.c b/modules/infra/api/nexthop.c index b4e1cfccc..df6b3e599 100644 --- a/modules/infra/api/nexthop.c +++ b/modules/infra/api/nexthop.c @@ -53,7 +53,7 @@ static struct api_out nh_del(const void *request, struct api_ctx *) { const struct gr_nh_del_req *req = request; struct nexthop *nh; - nh = nexthop_lookup_id(req->nh_id); + nh = nexthop_lookup(&req->nh.base, req->nh.info); if (nh == NULL) { if (req->missing_ok) return api_out(0, 0, NULL); diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 89dbeb213..916449d7a 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -421,7 +421,7 @@ static cmd_status_t nh_blackhole_add(struct gr_api_client *c, const struct ec_pn static cmd_status_t nh_del(struct gr_api_client *c, const struct ec_pnode *p) { struct gr_nh_del_req req = {.missing_ok = true}; - if (arg_u32(p, "ID", &req.nh_id) < 0) + if (arg_u32(p, "ID", &req.nh.nh_id) < 0) return CMD_ERROR; if (gr_api_client_send_recv(c, GR_NH_DEL, sizeof(req), &req, NULL) < 0) diff --git a/modules/infra/control/nexthop.c b/modules/infra/control/nexthop.c index 64ceccc02..8ab2b7f79 100644 --- a/modules/infra/control/nexthop.c +++ b/modules/infra/control/nexthop.c @@ -290,19 +290,18 @@ void nexthop_type_ops_register(gr_nh_type_t type, const struct nexthop_type_ops struct nexthop *nexthop_lookup(const struct gr_nexthop_base *base, const void *info) { const struct nexthop_type_ops *ops; - struct nexthop *nh = NULL; if (base == NULL) return errno_set_null(EINVAL); if (base->nh_id != GR_NH_ID_UNSET) - nh = nexthop_lookup_id(base->nh_id); + return nexthop_lookup_id(base->nh_id); ops = type_ops[base->type]; - if (nh == NULL && ops != NULL && ops->lookup != NULL) - nh = ops->lookup(base, info); + if (ops != NULL && ops->lookup != NULL) + return ops->lookup(base, info); - return nh; + return errno_set_null(ENOENT); } struct nexthop *nexthop_new(const struct gr_nexthop_base *base, const void *info) { From ad0da9b4952b226bad1db8a1daf1cc5e58ad1650 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:43:38 +0200 Subject: [PATCH 13/19] frr: sync resolved nexthops as neighbor entries to zebra When a grout nexthop event arrives for an L3 nexthop with a resolved MAC, push a DPLANE_OP_NEIGH_INSTALL to zebra. On deletion, push DPLANE_OP_NEIGH_DELETE. This makes FRR aware of locally learned neighbors so it can advertise them as Type-2 MAC+IP routes via BGP EVPN. The notification is called before the nh_id check in grout_nexthop_change so that nexthops with nh_id=0 (auto-allocated by the ARP/NDP control plane) still produce neighbor entries in zebra even though they are not registered as zebra nexthop groups. Signed-off-by: Robin Jarry --- frr/rt_grout.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 21916e468..296fbaec2 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -828,6 +828,56 @@ enum zebra_dplane_result grout_add_del_nexthop(struct zebra_dplane_ctx *ctx) { return grout_add_nexthop(nh_id, origin, dplane_ctx_get_nhe_ng(ctx)->nexthop); } +static void grout_neigh_notify(bool new, struct gr_nexthop *gr_nh) { + const struct gr_nexthop_info_l3 *l3; + static const struct ethaddr zero_mac = {}; + struct zebra_dplane_ctx *ctx; + struct ethaddr mac; + struct ipaddr ip; + + if (gr_nh->type != GR_NH_T_L3) + return; + + l3 = (const struct gr_nexthop_info_l3 *)gr_nh->info; + + if (l3->af != GR_AF_IP4 && l3->af != GR_AF_IP6) + return; + if (l3->flags & (GR_NH_F_LOCAL | GR_NH_F_LINK | GR_NH_F_MCAST | GR_NH_F_REMOTE)) + return; + if (new && memcmp(&l3->mac, &zero_mac, sizeof(zero_mac)) == 0) + return; + + memset(&ip, 0, sizeof(ip)); + if (l3->af == GR_AF_IP4) { + ip.ipa_type = IPADDR_V4; + memcpy(&ip.ipaddr_v4, &l3->ipv4, sizeof(ip.ipaddr_v4)); + } else { + ip.ipa_type = IPADDR_V6; + memcpy(&ip.ipaddr_v6, &l3->ipv6, sizeof(ip.ipaddr_v6)); + } + memcpy(&mac, &l3->mac, sizeof(mac)); + + gr_log_debug( + "%s neigh iface=%u %pIA %pEA", new ? "add" : "del", gr_nh->iface_id, &ip, &mac + ); + + ctx = dplane_ctx_alloc(); + dplane_ctx_set_op(ctx, new ? DPLANE_OP_NEIGH_INSTALL : DPLANE_OP_NEIGH_DELETE); + dplane_ctx_set_ns_id(ctx, GROUT_NS); + dplane_ctx_set_ifindex(ctx, ifindex_grout_to_frr(gr_nh->iface_id)); + dplane_ctx_neigh_set_ipaddr(ctx, &ip); + dplane_ctx_neigh_set_mac(ctx, &mac); + dplane_ctx_neigh_set_ndm_state(ctx, NUD_REACHABLE); + dplane_ctx_neigh_set_ndm_family(ctx, l3->af == GR_AF_IP4 ? AF_INET : AF_INET6); + dplane_ctx_neigh_set_l2_len(ctx, ETH_ALEN); + dplane_ctx_neigh_set_is_ext(ctx, false); + dplane_ctx_neigh_set_is_router(ctx, false); + dplane_ctx_neigh_set_dp_static(ctx, false); + dplane_ctx_neigh_set_local_inactive(ctx, false); + + dplane_provider_enqueue_to_zebra(ctx); +} + void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup) { struct nexthop *nh = NULL; afi_t afi = AFI_UNSPEC; @@ -835,6 +885,8 @@ void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup) { gr_log_debug("%s nh_id %u", new ? "add" : "del", gr_nh->nh_id); + grout_neigh_notify(new, gr_nh); + // XXX: grout is optional to have an ID for nexthop // but in FRR, it's mandatory if (gr_nh->nh_id == 0) { From 5b9d1fbfd102b69e0c36e6acb2917fe1d314b465 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:44:25 +0200 Subject: [PATCH 14/19] frr: install nexthops from EVPN neighbor entries When the dplane plugin receives DPLANE_OP_NEIGH_INSTALL for a remote MAC/IP (from EVPN Type-2 routes), install the entry as a grout nexthop with GR_NH_F_REMOTE. The iface_id is resolved from the FRR ifindex which points to the bridge (SVI) interface. This allows the bridge suppress node to look up the nexthop and answer ARP/ND requests locally. Both IPv4 and IPv6 neighbors are supported. The existing L3VNI RMAC caching for Type-5 routes is preserved alongside. Signed-off-by: Robin Jarry --- frr/rt_grout.c | 114 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 99 insertions(+), 15 deletions(-) diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 296fbaec2..a8b586dfe 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -1020,28 +1020,112 @@ enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx) { return ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; } +static void neigh_install_nexthop(struct zebra_dplane_ctx *ctx, const struct ipaddr *addr) { + const struct ethaddr *mac = dplane_ctx_neigh_get_mac(ctx); + uint16_t iface_id = ifindex_frr_to_grout(dplane_ctx_get_ifindex(ctx)); + struct gr_nexthop_info_l3 *l3; + struct gr_nh_add_req *req; + size_t len; + + if (iface_id == GR_IFACE_ID_UNDEF) + return; + + len = sizeof(*req) + sizeof(*l3); + req = calloc(1, len); + if (req == NULL) { + gr_log_err("calloc: %s", strerror(errno)); + return; + } + + req->exist_ok = true; + req->nh.type = GR_NH_T_L3; + req->nh.origin = GR_NH_ORIGIN_BGP; + req->nh.iface_id = iface_id; + l3 = (struct gr_nexthop_info_l3 *)req->nh.info; + l3->flags = GR_NH_F_REMOTE; + + switch (addr->ipa_type) { + case IPADDR_V4: + l3->af = GR_AF_IP4; + memcpy(&l3->ipv4, &addr->ipaddr_v4, sizeof(l3->ipv4)); + break; + case IPADDR_V6: + l3->af = GR_AF_IP6; + memcpy(&l3->ipv6, &addr->ipaddr_v6, sizeof(l3->ipv6)); + break; + default: + free(req); + return; + } + memcpy(&l3->mac, mac, sizeof(l3->mac)); + + gr_log_debug("install remote nh iface=%u %pIA %pEA", iface_id, addr, mac); + grout_client_send_recv(GR_NH_ADD, len, req, NULL); + free(req); +} + +static void neigh_delete_nexthop(struct zebra_dplane_ctx *ctx, const struct ipaddr *addr) { + uint16_t iface_id = ifindex_frr_to_grout(dplane_ctx_get_ifindex(ctx)); + struct gr_nexthop_info_l3 *l3; + struct gr_nh_del_req *req; + size_t len; + + if (iface_id == GR_IFACE_ID_UNDEF) + return; + + len = sizeof(*req) + sizeof(*l3); + req = calloc(1, len); + if (req == NULL) { + gr_log_err("calloc: %s", strerror(errno)); + return; + } + + req->missing_ok = true; + req->nh.type = GR_NH_T_L3; + req->nh.iface_id = iface_id; + l3 = (struct gr_nexthop_info_l3 *)req->nh.info; + + switch (addr->ipa_type) { + case IPADDR_V4: + l3->af = GR_AF_IP4; + memcpy(&l3->ipv4, &addr->ipaddr_v4, sizeof(l3->ipv4)); + break; + case IPADDR_V6: + l3->af = GR_AF_IP6; + memcpy(&l3->ipv6, &addr->ipaddr_v6, sizeof(l3->ipv6)); + break; + default: + free(req); + return; + } + + gr_log_debug("delete remote nh iface=%u %pIA", iface_id, addr); + grout_client_send_recv(GR_NH_DEL, len, req, NULL); + free(req); +} + enum zebra_dplane_result grout_neigh_update_ctx(struct zebra_dplane_ctx *ctx) { const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); bool add = dplane_ctx_get_op(ctx) != DPLANE_OP_NEIGH_DELETE; uint16_t vrf_id = vrf_frr_to_grout(dplane_ctx_get_vrf(ctx)); - if (addr->ipa_type != IPADDR_V4) { - gr_log_debug("only IPv4 VTEP addresses supported, skip"); - return ZEBRA_DPLANE_REQUEST_SUCCESS; + // Cache the RMAC for L3VNI routes (IPv4 VTEP addresses only). + if (addr->ipa_type == IPADDR_V4) { + if (add) { + const struct ethaddr *mac = dplane_ctx_neigh_get_mac(ctx); + gr_log_debug("cache rmac vrf=%u %pIA %pEA", vrf_id, addr, mac); + l3vni_rmac_set(vrf_id, addr->ipaddr_v4.s_addr, mac); + } else { + gr_log_debug("uncache rmac vrf=%u %pIA", vrf_id, addr); + l3vni_rmac_del(vrf_id, addr->ipaddr_v4.s_addr); + } } - // Cache the RMAC for later use by grout_add_nexthop. We cannot - // create a separate nexthop here because grout's L3 nexthop hash - // keys on (vrf, addr) without iface_id, so it would collide with - // the route nexthop that FRR installs right after. - if (add) { - const struct ethaddr *mac = dplane_ctx_neigh_get_mac(ctx); - gr_log_debug("cache rmac vrf=%u %pIA %pEA", vrf_id, addr, mac); - l3vni_rmac_set(vrf_id, addr->ipaddr_v4.s_addr, mac); - } else { - gr_log_debug("uncache rmac vrf=%u %pIA", vrf_id, addr); - l3vni_rmac_del(vrf_id, addr->ipaddr_v4.s_addr); - } + // Install/remove a remote nexthop for ARP/ND suppression. + if (add) + neigh_install_nexthop(ctx, addr); + else + neigh_delete_nexthop(ctx, addr); return ZEBRA_DPLANE_REQUEST_SUCCESS; } From e777913e2f5f8380a0b965ced1279baedc453e76 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 23:13:10 +0200 Subject: [PATCH 15/19] bridge: homogenize flag names All interface/nexthop flags use "presence" semantics. Having flags to signify the absence of a feature is confusing. Change the semantics of bridge flags to follow the same style. Signed-off-by: Robin Jarry --- modules/l2/api/gr_l2.h | 4 +-- modules/l2/cli/bridge.c | 56 +++++++++++++++--------------- modules/l2/datapath/bridge_input.c | 5 ++- 3 files changed, 32 insertions(+), 33 deletions(-) diff --git a/modules/l2/api/gr_l2.h b/modules/l2/api/gr_l2.h index 54a9ef352..07e1c4e8d 100644 --- a/modules/l2/api/gr_l2.h +++ b/modules/l2/api/gr_l2.h @@ -14,8 +14,8 @@ // Bridge configuration flags. typedef enum : uint16_t { - GR_BRIDGE_F_NO_FLOOD = GR_BIT16(0), - GR_BRIDGE_F_NO_LEARN = GR_BIT16(1), + GR_BRIDGE_F_FLOOD = GR_BIT16(0), // Flood BUM traffic. + GR_BRIDGE_F_LEARN = GR_BIT16(1), // Dynamic MAC learning in FDB. } gr_bridge_flags_t; #define GR_BRIDGE_MAX_MEMBERS 64 diff --git a/modules/l2/cli/bridge.c b/modules/l2/cli/bridge.c index 6a4c134e6..5f9639e4f 100644 --- a/modules/l2/cli/bridge.c +++ b/modules/l2/cli/bridge.c @@ -12,6 +12,7 @@ #include #include +#include #include static void @@ -22,9 +23,9 @@ bridge_show(struct gr_api_client *c, const struct gr_iface *iface, struct gr_obj o, "bridge_flags", GR_DISP_STR_ARRAY, - "%sflood %slearn", - (bridge->flags & GR_BRIDGE_F_NO_FLOOD) ? "no_" : "", - (bridge->flags & GR_BRIDGE_F_NO_LEARN) ? "no_" : "" + "flood %s learn %s", + (bridge->flags & GR_BRIDGE_F_FLOOD) ? "on" : "off", + (bridge->flags & GR_BRIDGE_F_LEARN) ? "on" : "off" ); gr_object_field(o, "ageing_time", GR_DISP_INT, "%u", bridge->ageing_time); gr_object_field(o, "mac", 0, ETH_F, &bridge->mac); @@ -40,10 +41,10 @@ bridge_list_info(struct gr_api_client *, const struct gr_iface *iface, char *buf snprintf( buf, len, - "members=%u %sflood %slearn", + "members=%u flood %s learn %s", bridge->n_members, - (bridge->flags & GR_BRIDGE_F_NO_FLOOD) ? "no_" : "", - (bridge->flags & GR_BRIDGE_F_NO_LEARN) ? "no_" : "" + (bridge->flags & GR_BRIDGE_F_FLOOD) ? "on" : "off", + (bridge->flags & GR_BRIDGE_F_LEARN) ? "on" : "off" ); } @@ -64,18 +65,21 @@ static uint64_t parse_bridge_args( set_attrs = parse_iface_args(c, p, iface, sizeof(*bridge), update); - if (arg_str(p, "flood")) { - bridge->flags &= ~GR_BRIDGE_F_NO_FLOOD; + const char *on_off = arg_str(p, "FLOOD"); + if (on_off != NULL && strcmp(on_off, "on") == 0) { + bridge->flags |= GR_BRIDGE_F_FLOOD; set_attrs |= GR_BRIDGE_SET_FLAGS; - } else if (arg_str(p, "no_flood")) { - bridge->flags |= GR_BRIDGE_F_NO_FLOOD; + } else if (on_off != NULL && strcmp(on_off, "off") == 0) { + bridge->flags &= ~GR_BRIDGE_F_FLOOD; set_attrs |= GR_BRIDGE_SET_FLAGS; } - if (arg_str(p, "learn")) { - bridge->flags &= ~GR_BRIDGE_F_NO_LEARN; + + on_off = arg_str(p, "LEARN"); + if (on_off != NULL && strcmp(on_off, "on") == 0) { + bridge->flags |= GR_BRIDGE_F_LEARN; set_attrs |= GR_BRIDGE_SET_FLAGS; - } else if (arg_str(p, "no_learn")) { - bridge->flags |= GR_BRIDGE_F_NO_LEARN; + } else if (on_off != NULL && strcmp(on_off, "off") == 0) { + bridge->flags &= ~GR_BRIDGE_F_LEARN; set_attrs |= GR_BRIDGE_SET_FLAGS; } @@ -98,6 +102,7 @@ static uint64_t parse_bridge_args( static cmd_status_t bridge_add(struct gr_api_client *c, const struct ec_pnode *p) { const struct gr_iface_add_resp *resp; struct gr_iface_add_req *req = NULL; + struct gr_iface_info_bridge *br; void *resp_ptr = NULL; size_t len; @@ -107,6 +112,8 @@ static cmd_status_t bridge_add(struct gr_api_client *c, const struct ec_pnode *p req->iface.type = GR_IFACE_TYPE_BRIDGE; req->iface.flags = GR_IFACE_F_UP; + br = PAYLOAD(req); + br->flags = GR_BRIDGE_F_FLOOD | GR_BRIDGE_F_LEARN; if (parse_bridge_args(c, p, &req->iface, false) == 0) goto err; @@ -145,7 +152,7 @@ static cmd_status_t bridge_set(struct gr_api_client *c, const struct ec_pnode *p return ret; } -#define BRIDGE_ATTRS_CMD IFACE_ATTRS_CMD ",(ageing_time AGE),(mac MAC),FLOOD,LEARN" +#define BRIDGE_ATTRS_CMD IFACE_ATTRS_CMD ",(ageing_time AGE),(mac MAC),(flood FLOOD),(learn LEARN)" #define BRIDGE_ATTRS_ARGS \ IFACE_ATTRS_ARGS, \ @@ -154,20 +161,13 @@ static cmd_status_t bridge_set(struct gr_api_client *c, const struct ec_pnode *p ec_node_uint("AGE", 0, UINT16_MAX, 10) \ ), \ with_help("Bridge ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), \ - EC_NODE_OR( \ - "FLOOD", \ - with_help( \ - "Enable flooding of BUM traffic.", ec_node_str("flood", "flood") \ - ), \ - with_help( \ - "Disable flooding of BUM traffic.", \ - ec_node_str("no_flood", "no_flood") \ - ) \ + with_help( \ + "Enable/disable flooding of BUM traffic.", \ + EC_NODE_OR("FLOOD", ec_node_str("", "on"), ec_node_str("", "off")) \ ), \ - EC_NODE_OR( \ - "LEARN", \ - with_help("Enable MAC learning.", ec_node_str("learn", "learn")), \ - with_help("Disable MAC learning.", ec_node_str("no_learn", "no_learn")) \ + with_help( \ + "Enable/disable dynamic MAC learning.", \ + EC_NODE_OR("LEARN", ec_node_str("", "on"), ec_node_str("", "off")) \ ) static int ctx_init(struct ec_node *root) { diff --git a/modules/l2/datapath/bridge_input.c b/modules/l2/datapath/bridge_input.c index a7326ff41..32fb0f01a 100644 --- a/modules/l2/datapath/bridge_input.c +++ b/modules/l2/datapath/bridge_input.c @@ -59,8 +59,7 @@ static uint16_t bridge_input_process( } br = iface_info_bridge(bridge); - if (rte_is_unicast_ether_addr(ð->src_addr) - && !(br->flags & GR_BRIDGE_F_NO_LEARN)) { + if (rte_is_unicast_ether_addr(ð->src_addr) && (br->flags & GR_BRIDGE_F_LEARN)) { vtep = (d->iface->type == GR_IFACE_TYPE_VXLAN) ? d->vtep : 0; fdb_learn(bridge->id, d->iface->id, ð->src_addr, d->vlan_id, vtep); } @@ -96,7 +95,7 @@ static uint16_t bridge_input_process( edge = FLOOD; } next: - if (edge == FLOOD && (br->flags & GR_BRIDGE_F_NO_FLOOD)) + if (edge == FLOOD && !(br->flags & GR_BRIDGE_F_FLOOD)) edge = FLOOD_DISABLED; rte_node_enqueue_x1(graph, node, edge, m); From 8c10a9beee458960962f4ad1e316fe43c9ff63a0 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 23:11:52 +0200 Subject: [PATCH 16/19] api: generalize gr_flags_foreach This macro will be useful to format all flag masks. Signed-off-by: Robin Jarry --- .clang-format | 2 +- api/gr_macro.h | 5 +++++ modules/infra/api/gr_nexthop.h | 7 +------ modules/infra/cli/nexthop.c | 2 +- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/.clang-format b/.clang-format index fcd12eea7..031c9b940 100644 --- a/.clang-format +++ b/.clang-format @@ -85,7 +85,7 @@ ForEachMacros: - gr_api_client_stream_foreach - vec_foreach - vec_foreach_ref - - gr_nh_flags_foreach + - gr_flags_foreach IfMacros: - KJ_IF_MAYBE IncludeBlocks: Regroup diff --git a/api/gr_macro.h b/api/gr_macro.h index f9225feda..8a3e3229e 100644 --- a/api/gr_macro.h +++ b/api/gr_macro.h @@ -5,6 +5,7 @@ #include #include +#include // Get number of elements in a static array. #define ARRAY_DIM(array) (sizeof(array) / sizeof(array[0])) @@ -48,3 +49,7 @@ #define GR_SYMBOL_FORBIDDEN(func, new_func) \ sorry_##func##_is_a_banned_function_use_##new_func##_instead + +#define gr_flags_foreach(f, flags) \ + for (uint64_t __bit = 0, f = 1ULL; __bit < sizeof(flags) * CHAR_BIT; f = 1ULL << ++__bit) \ + if (flags & f) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index 6ee391c55..6ab0f0e83 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -129,11 +129,6 @@ typedef enum { GR_EVENT_NEXTHOP_UPDATE = EVENT_TYPE(GR_INFRA_MODULE, 0x0102), } gr_event_nexthop_t; -#define gr_nh_flags_foreach(f, flags) \ - for (gr_nh_flags_t __i = 0, f = GR_BIT8(0); __i < sizeof(gr_nh_flags_t) * CHAR_BIT; \ - f = GR_BIT8(++__i)) \ - if (flags & f) - // Convert nexthop state enum to string representation. static inline const char *gr_nh_state_name(const gr_nh_state_t state) { switch (state) { @@ -152,7 +147,7 @@ static inline const char *gr_nh_state_name(const gr_nh_state_t state) { } // Convert nexthop flag enum to string representation. -// For flag masks, iterate individual flags using gr_nh_flags_foreach. +// For flag masks, iterate individual flags using gr_flags_foreach. static inline const char *gr_nh_flag_name(const gr_nh_flags_t flag) { switch (flag) { case GR_NH_F_STATIC: diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 916449d7a..0b537abce 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -124,7 +124,7 @@ static void add_columns_l3(struct gr_table *table) { static void format_nh_flags(char *buf, size_t len, gr_nh_flags_t flags) { ssize_t n = 0; buf[0] = 0; - gr_nh_flags_foreach (fl, flags) { + gr_flags_foreach (fl, flags) { if (n > 0) SAFE_BUF(snprintf, len, " "); SAFE_BUF(snprintf, len, "%s", gr_nh_flag_name(fl)); From acd1a54cd9e2a265ade5d50733f4be5b558dd8e7 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 23:19:16 +0200 Subject: [PATCH 17/19] l2: add bridge and fdb flag names in API header Use gr_flags_foreach to simplify formatting them. Signed-off-by: Robin Jarry --- modules/l2/api/gr_l2.h | 26 ++++++++++++++++++++++++++ modules/l2/cli/bridge.c | 30 ++++++++++++++++-------------- modules/l2/cli/fdb.c | 12 ++++++------ 3 files changed, 48 insertions(+), 20 deletions(-) diff --git a/modules/l2/api/gr_l2.h b/modules/l2/api/gr_l2.h index 07e1c4e8d..223e96b27 100644 --- a/modules/l2/api/gr_l2.h +++ b/modules/l2/api/gr_l2.h @@ -18,6 +18,18 @@ typedef enum : uint16_t { GR_BRIDGE_F_LEARN = GR_BIT16(1), // Dynamic MAC learning in FDB. } gr_bridge_flags_t; +// Convert bridge flag enum to string representation. +// For flag masks, iterate individual flags using gr_flags_foreach. +static inline const char *gr_bridge_flag_name(gr_bridge_flags_t f) { + switch (f) { + case GR_BRIDGE_F_FLOOD: + return "flood"; + case GR_BRIDGE_F_LEARN: + return "learn"; + } + return "?"; +} + #define GR_BRIDGE_MAX_MEMBERS 64 #define GR_BRIDGE_DEFAULT_AGEING 300 @@ -66,6 +78,20 @@ typedef enum : uint8_t { GR_FDB_F_EXTERN = GR_BIT8(2), // Programmed by external control plane. } gr_fdb_flags_t; +// Convert FDB flag enum to string representation. +// For flag masks, iterate individual flags using gr_flags_foreach. +static inline const char *gr_fdb_flag_name(gr_fdb_flags_t f) { + switch (f) { + case GR_FDB_F_STATIC: + return "static"; + case GR_FDB_F_LEARN: + return "learn"; + case GR_FDB_F_EXTERN: + return "extern"; + } + return "?"; +} + // Forwarding database entry associating a MAC+VLAN to a bridge member interface. struct gr_fdb_entry { uint16_t bridge_id; diff --git a/modules/l2/cli/bridge.c b/modules/l2/cli/bridge.c index 5f9639e4f..d0228e681 100644 --- a/modules/l2/cli/bridge.c +++ b/modules/l2/cli/bridge.c @@ -15,17 +15,26 @@ #include #include +static const char *format_bridge_flags(gr_bridge_flags_t flags) { + static char buf[128]; // grcli is single threaded, this is safe + size_t n = 0; + buf[0] = 0; + + gr_flags_foreach (f, flags) { + if (n > 0) + SAFE_BUF(snprintf, sizeof(buf), " "); + SAFE_BUF(snprintf, sizeof(buf), "%s", gr_bridge_flag_name(f)); + } +err: + return buf; +} + static void bridge_show(struct gr_api_client *c, const struct gr_iface *iface, struct gr_object *o) { const struct gr_iface_info_bridge *bridge = PAYLOAD(iface); gr_object_field( - o, - "bridge_flags", - GR_DISP_STR_ARRAY, - "flood %s learn %s", - (bridge->flags & GR_BRIDGE_F_FLOOD) ? "on" : "off", - (bridge->flags & GR_BRIDGE_F_LEARN) ? "on" : "off" + o, "bridge_flags", GR_DISP_STR_ARRAY, "%s", format_bridge_flags(bridge->flags) ); gr_object_field(o, "ageing_time", GR_DISP_INT, "%u", bridge->ageing_time); gr_object_field(o, "mac", 0, ETH_F, &bridge->mac); @@ -38,14 +47,7 @@ bridge_show(struct gr_api_client *c, const struct gr_iface *iface, struct gr_obj static void bridge_list_info(struct gr_api_client *, const struct gr_iface *iface, char *buf, size_t len) { const struct gr_iface_info_bridge *bridge = PAYLOAD(iface); - snprintf( - buf, - len, - "members=%u flood %s learn %s", - bridge->n_members, - (bridge->flags & GR_BRIDGE_F_FLOOD) ? "on" : "off", - (bridge->flags & GR_BRIDGE_F_LEARN) ? "on" : "off" - ); + snprintf(buf, len, "members=%u %s", bridge->n_members, format_bridge_flags(bridge->flags)); } static struct cli_iface_type bridge_type = { diff --git a/modules/l2/cli/fdb.c b/modules/l2/cli/fdb.c index a3e50a328..5ba88b829 100644 --- a/modules/l2/cli/fdb.c +++ b/modules/l2/cli/fdb.c @@ -79,12 +79,12 @@ static cmd_status_t fdb_flush(struct gr_api_client *c, const struct ec_pnode *p) static size_t fdb_format_flags(char *buf, size_t len, gr_fdb_flags_t flags) { size_t n = 0; buf[0] = 0; - if (flags & GR_FDB_F_LEARN) - SAFE_BUF(snprintf, len, "%slearn", n ? " " : ""); - if (flags & GR_FDB_F_STATIC) - SAFE_BUF(snprintf, len, "%sstatic", n ? " " : ""); - if (flags & GR_FDB_F_EXTERN) - SAFE_BUF(snprintf, len, "%sextern", n ? " " : ""); + + gr_flags_foreach (f, flags) { + if (n > 0) + SAFE_BUF(snprintf, len, " "); + SAFE_BUF(snprintf, len, "%s", gr_fdb_flag_name(f)); + } err: return n; } From 724dad55705b4dcac3f1f678537e149dc383b166 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:45:21 +0200 Subject: [PATCH 18/19] l2: add ARP/ND suppression in bridges Add flag to allow enabling ARP/ND suppression on bridge interfaces. When a bridge has GR_BRIDGE_F_NEIGH_SUPPRESS enabled, broadcast and multicast traffic is routed through bridge_neigh_suppress before flooding. The node intercepts ARP requests and IPv6 Neighbor Solicitations, looks up the target IP in the bridge's VRF nexthop table, and replies on behalf of known remote hosts (GR_NH_F_REMOTE with a reachable MAC). For ARP, the Ethernet and ARP headers are rewritten in-place. For ND, the packet is rebuilt from scratch since a Neighbor Advertisement has a different structure than a Neighbor Solicitation. In both cases, the reply is sent directly to iface_output, bypassing eth_output to avoid double Ethernet header prepending. Packets that cannot be suppressed (unknown target, non-ARP/ND, or non-remote nexthop) fall through to bridge_flood as usual. Also add a "remote" flag to the nexthop add l3 CLI command so remote nexthops can be configured manually for testing. Signed-off-by: Robin Jarry --- modules/l2/api/gr_l2.h | 3 + modules/l2/cli/bridge.c | 19 +- modules/l2/datapath/bridge_input.c | 5 +- modules/l2/datapath/bridge_neigh_suppress.c | 217 ++++++++++++++++++++ modules/l2/datapath/meson.build | 1 + 5 files changed, 242 insertions(+), 3 deletions(-) create mode 100644 modules/l2/datapath/bridge_neigh_suppress.c diff --git a/modules/l2/api/gr_l2.h b/modules/l2/api/gr_l2.h index 223e96b27..df8def59d 100644 --- a/modules/l2/api/gr_l2.h +++ b/modules/l2/api/gr_l2.h @@ -16,6 +16,7 @@ typedef enum : uint16_t { GR_BRIDGE_F_FLOOD = GR_BIT16(0), // Flood BUM traffic. GR_BRIDGE_F_LEARN = GR_BIT16(1), // Dynamic MAC learning in FDB. + GR_BRIDGE_F_NEIGH_SUPPRESS = GR_BIT16(2), // Intercept ARP/NDP requests. } gr_bridge_flags_t; // Convert bridge flag enum to string representation. @@ -26,6 +27,8 @@ static inline const char *gr_bridge_flag_name(gr_bridge_flags_t f) { return "flood"; case GR_BRIDGE_F_LEARN: return "learn"; + case GR_BRIDGE_F_NEIGH_SUPPRESS: + return "neigh_suppress"; } return "?"; } diff --git a/modules/l2/cli/bridge.c b/modules/l2/cli/bridge.c index d0228e681..96272743f 100644 --- a/modules/l2/cli/bridge.c +++ b/modules/l2/cli/bridge.c @@ -84,6 +84,14 @@ static uint64_t parse_bridge_args( bridge->flags &= ~GR_BRIDGE_F_LEARN; set_attrs |= GR_BRIDGE_SET_FLAGS; } + on_off = arg_str(p, "neigh_suppress"); + if (on_off != NULL && strcmp(on_off, "on") == 0) { + bridge->flags |= GR_BRIDGE_F_NEIGH_SUPPRESS; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } else if (on_off != NULL && strcmp(on_off, "off") == 0) { + bridge->flags &= ~GR_BRIDGE_F_NEIGH_SUPPRESS; + set_attrs |= GR_BRIDGE_SET_FLAGS; + } if (arg_u16(p, "AGE", &bridge->ageing_time) == 0) set_attrs |= GR_BRIDGE_SET_AGEING_TIME; @@ -154,8 +162,9 @@ static cmd_status_t bridge_set(struct gr_api_client *c, const struct ec_pnode *p return ret; } -#define BRIDGE_ATTRS_CMD IFACE_ATTRS_CMD ",(ageing_time AGE),(mac MAC),(flood FLOOD),(learn LEARN)" - +#define BRIDGE_ATTRS_CMD \ + IFACE_ATTRS_CMD ",(ageing_time AGE),(mac MAC)" \ + ",(flood FLOOD),(learn LEARN),(neigh_suppress NEIGH_SUPPRESS)" #define BRIDGE_ATTRS_ARGS \ IFACE_ATTRS_ARGS, \ with_help( \ @@ -167,6 +176,12 @@ static cmd_status_t bridge_set(struct gr_api_client *c, const struct ec_pnode *p "Enable/disable flooding of BUM traffic.", \ EC_NODE_OR("FLOOD", ec_node_str("", "on"), ec_node_str("", "off")) \ ), \ + with_help( \ + "Enable/disable ARP/NDP requests suppression.", \ + EC_NODE_OR( \ + "NEIGH_SUPPRESS", ec_node_str("", "on"), ec_node_str("", "off") \ + ) \ + ), \ with_help( \ "Enable/disable dynamic MAC learning.", \ EC_NODE_OR("LEARN", ec_node_str("", "on"), ec_node_str("", "off")) \ diff --git a/modules/l2/datapath/bridge_input.c b/modules/l2/datapath/bridge_input.c index 32fb0f01a..a83ba896f 100644 --- a/modules/l2/datapath/bridge_input.c +++ b/modules/l2/datapath/bridge_input.c @@ -13,6 +13,7 @@ enum edges { OUTPUT = 0, INPUT, FLOOD, + NEIGH_SUPPRESS, BRIDGE_INVAL, HAIRPIN, OUT_IFACE_INVAL, @@ -90,8 +91,9 @@ static uint16_t bridge_input_process( } else { edge = OUTPUT; } + } else if (br->flags & GR_BRIDGE_F_NEIGH_SUPPRESS) { + edge = NEIGH_SUPPRESS; } else { - // Broadcast, multicast edge = FLOOD; } next: @@ -130,6 +132,7 @@ static struct rte_node_register node = { [OUTPUT] = "iface_output", [INPUT] = "iface_input", [FLOOD] = "bridge_flood", + [NEIGH_SUPPRESS] = "bridge_neigh_suppress", [BRIDGE_INVAL] = "bridge_input_invalid_domain", [HAIRPIN] = "bridge_input_hairpin", [OUT_IFACE_INVAL] = "bridge_input_invalid_output", diff --git a/modules/l2/datapath/bridge_neigh_suppress.c b/modules/l2/datapath/bridge_neigh_suppress.c new file mode 100644 index 000000000..a154c7ac7 --- /dev/null +++ b/modules/l2/datapath/bridge_neigh_suppress.c @@ -0,0 +1,217 @@ +// SPDX-License-Identifier: BSD-3-Clause +// Copyright (c) 2026 Robin Jarry + +#include "icmp6.h" +#include "iface.h" +#include "ip4.h" +#include "ip6.h" +#include "ip6_datapath.h" +#include "nexthop.h" +#include "rxtx.h" + +#include +#include +#include +#include + +enum edges { + IFACE_OUTPUT = 0, + FLOOD, + DROP, + EDGE_COUNT, +}; + +static inline bool is_suppressable(const struct nexthop *nh) { + const struct nexthop_info_l3 *l3; + + if (nh == NULL || nh->type != GR_NH_T_L3) + return false; + + l3 = nexthop_info_l3(nh); + if (!(l3->flags & GR_NH_F_REMOTE)) + return false; + if (l3->state != GR_NH_S_REACHABLE) + return false; + if (rte_is_zero_ether_addr(&l3->mac)) + return false; + + return true; +} + +static rte_edge_t +suppress_arp(struct rte_mbuf *m, struct rte_ether_hdr *eth, const struct iface *bridge) { + const struct nexthop_info_l3 *l3; + struct rte_ether_addr req_sha; + const struct nexthop *nh; + struct rte_arp_hdr *arp; + ip4_addr_t req_sip; + + if (rte_pktmbuf_pkt_len(m) < sizeof(*eth) + sizeof(*arp)) + return FLOOD; + + arp = PAYLOAD(eth); + if (arp->arp_opcode != RTE_BE16(RTE_ARP_OP_REQUEST)) + return FLOOD; + + nh = nh4_lookup(bridge->vrf_id, arp->arp_data.arp_tip); + if (!is_suppressable(nh)) + return FLOOD; + + l3 = nexthop_info_l3(nh); + + // Save requester info before overwriting. + req_sha = arp->arp_data.arp_sha; + req_sip = arp->arp_data.arp_sip; + + // Rewrite ARP payload in-place: request -> reply. + arp->arp_opcode = RTE_BE16(RTE_ARP_OP_REPLY); + arp->arp_data.arp_tha = req_sha; + arp->arp_data.arp_tip = req_sip; + arp->arp_data.arp_sha = l3->mac; + arp->arp_data.arp_sip = l3->ipv4; + + // Rewrite Ethernet header in-place. + eth->dst_addr = req_sha; + eth->src_addr = l3->mac; + + return IFACE_OUTPUT; +} + +static rte_edge_t +suppress_nd(struct rte_mbuf *m, struct rte_ether_hdr *eth, const struct iface *bridge) { + const struct nexthop_info_l3 *l3; + struct icmp6_neigh_solicit *ns; + struct icmp6_neigh_advert *na; + struct rte_ether_addr req_mac; + struct icmp6_opt_lladdr *ll; + struct rte_ipv6_addr req_ip; + const struct nexthop *nh; + struct rte_ipv6_hdr *ip6; + struct icmp6_opt *opt; + uint16_t payload_len; + struct icmp6 *icmp6; + + if (rte_pktmbuf_pkt_len(m) < sizeof(*eth) + sizeof(*ip6) + sizeof(*icmp6) + sizeof(*ns)) + return FLOOD; + + ip6 = PAYLOAD(eth); + if (ip6->proto != IPPROTO_ICMPV6) + return FLOOD; + + icmp6 = PAYLOAD(ip6); + if (icmp6->type != ICMP6_TYPE_NEIGH_SOLICIT) + return FLOOD; + + ns = PAYLOAD(icmp6); + nh = nh6_lookup(bridge->vrf_id, GR_IFACE_ID_UNDEF, &ns->target); + if (!is_suppressable(nh)) + return FLOOD; + + l3 = nexthop_info_l3(nh); + + // Save requester info. + req_mac = eth->src_addr; + req_ip = ip6->src_addr; + + // Trim entire packet and rebuild NA from scratch. + rte_pktmbuf_trim(m, rte_pktmbuf_pkt_len(m)); + + payload_len = sizeof(*icmp6) + sizeof(*na) + sizeof(*opt) + sizeof(*ll); + eth = (struct rte_ether_hdr *) + rte_pktmbuf_append(m, sizeof(*eth) + sizeof(*ip6) + payload_len); + if (eth == NULL) + return DROP; + + // Ethernet header. + eth->dst_addr = req_mac; + eth->src_addr = l3->mac; + eth->ether_type = RTE_BE16(RTE_ETHER_TYPE_IPV6); + + // IPv6 header. + ip6 = PAYLOAD(eth); + ip6_set_fields(ip6, payload_len, IPPROTO_ICMPV6, &l3->ipv6, &req_ip); + + // ICMPv6 NA. + icmp6 = PAYLOAD(ip6); + icmp6->type = ICMP6_TYPE_NEIGH_ADVERT; + icmp6->code = 0; + na = PAYLOAD(icmp6); + na->flags = ICMP6_NA_F_SOLICITED | ICMP6_NA_F_OVERRIDE; + na->__reserved = 0; + na->__reserved2 = 0; + na->target = l3->ipv6; + + // Target link-layer address option. + opt = PAYLOAD(na); + opt->type = ICMP6_OPT_TARGET_LLADDR; + opt->len = ICMP6_OPT_LEN(sizeof(*opt) + sizeof(*ll)); + ll = PAYLOAD(opt); + ll->mac = l3->mac; + + // Compute ICMPv6 checksum. + icmp6->cksum = 0; + icmp6->cksum = rte_ipv6_udptcp_cksum(ip6, icmp6); + + return IFACE_OUTPUT; +} + +static uint16_t bridge_neigh_suppress_process( + struct rte_graph *graph, + struct rte_node *node, + void **objs, + uint16_t nb_objs +) { + const struct iface *bridge; + struct iface_mbuf_data *d; + struct rte_ether_hdr *eth; + struct rte_mbuf *m; + rte_edge_t edge; + + for (uint16_t i = 0; i < nb_objs; i++) { + m = objs[i]; + d = iface_mbuf_data(m); + eth = rte_pktmbuf_mtod(m, struct rte_ether_hdr *); + + bridge = iface_from_id(d->iface->domain_id); + if (bridge == NULL) { + edge = DROP; + goto next; + } + + switch (eth->ether_type) { + case RTE_BE16(RTE_ETHER_TYPE_ARP): + edge = suppress_arp(m, eth, bridge); + break; + case RTE_BE16(RTE_ETHER_TYPE_IPV6): + edge = suppress_nd(m, eth, bridge); + break; + default: + edge = FLOOD; + break; + } +next: + rte_node_enqueue_x1(graph, node, edge, m); + } + + return nb_objs; +} + +static struct rte_node_register node = { + .name = "bridge_neigh_suppress", + .process = bridge_neigh_suppress_process, + .nb_edges = EDGE_COUNT, + .next_nodes = { + [IFACE_OUTPUT] = "iface_output", + [FLOOD] = "bridge_flood", + [DROP] = "bridge_neigh_suppress_drop", + }, +}; + +static struct gr_node_info info = { + .node = &node, + .type = GR_NODE_T_L2, +}; + +GR_NODE_REGISTER(info); + +GR_DROP_REGISTER(bridge_neigh_suppress_drop); diff --git a/modules/l2/datapath/meson.build b/modules/l2/datapath/meson.build index b6dc45fc5..6207085ea 100644 --- a/modules/l2/datapath/meson.build +++ b/modules/l2/datapath/meson.build @@ -4,6 +4,7 @@ src += files( 'bridge_flood.c', 'bridge_input.c', + 'bridge_neigh_suppress.c', 'vxlan_flood.c', 'vxlan_input.c', 'vxlan_output.c', From 96d65ccbc4613622ea61a37db695b40528b43b11 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Wed, 1 Apr 2026 19:53:56 +0200 Subject: [PATCH 19/19] smoke: add ARP/ND suppression tests bridge_neigh_suppress_test verifies suppression with a manually configured remote nexthop: an ARP request for a known remote IP gets answered locally by grout with the correct MAC, and is not flooded to other bridge ports. evpn_neigh_suppress_frr_test verifies end-to-end suppression with FRR EVPN. The Linux peer has neigh_suppress on the VXLAN interface and advertise-svi-ip in BGP. Both sides have bridge SVIs and hosts with default routes through them. When hosts ARP for their gateways, VTEPs learn the IP+MAC bindings and advertise Type-2 MAC+IP routes. The remote VTEP installs the nexthops and the suppress node answers ARPs locally. Signed-off-by: Robin Jarry --- smoke/bridge_neigh_suppress_test.sh | 71 +++++++++ smoke/evpn_neigh_suppress_frr_test.sh | 205 ++++++++++++++++++++++++++ 2 files changed, 276 insertions(+) create mode 100755 smoke/bridge_neigh_suppress_test.sh create mode 100755 smoke/evpn_neigh_suppress_frr_test.sh diff --git a/smoke/bridge_neigh_suppress_test.sh b/smoke/bridge_neigh_suppress_test.sh new file mode 100755 index 000000000..784c7b274 --- /dev/null +++ b/smoke/bridge_neigh_suppress_test.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +# Verify ARP/ND suppression with manually configured remote nexthops. +# +# A bridge with neigh_suppress has two ports. A remote nexthop (with a known +# MAC) is installed for a target IP. When a host sends an ARP request for that +# IP, grout replies locally using the remote nexthop's MAC instead of flooding +# the request to the other port. +# +# .-------------. .-----------------------------. .-------------. +# | host-a | | grout | | host-b | +# | | | | | | +# | +-------+ | | +----+ +------+ +----+ | | +-------+ | +# | | x-p0 | | | | p0 +---+ br0 +--+ p1 | | | | x-p1 | | +# | +---+---+ | | +----+ +------+ +----+ | | +---+---+ | +# '------|------' '------|neigh_suppress|------' '------|------' +# | '----+---------' | +# 10.0.0.2 10.0.0.1 10.0.0.3 +# remote nh: 10.0.0.99 = de:ad:be:ef:00:01 + +. $(dirname $0)/_init.sh + +grcli interface add bridge br0 neigh_suppress on +grcli address add 10.0.0.1/24 iface br0 + +port_add p0 domain br0 +port_add p1 domain br0 + +# Install a remote nexthop for the suppressed IP. +grcli nexthop add l3 iface br0 address 10.0.0.99 mac de:ad:be:ef:00:01 remote + +netns_add host-a +move_to_netns x-p0 host-a +ip -n host-a addr add 10.0.0.2/24 dev x-p0 + +netns_add host-b +move_to_netns x-p1 host-b +ip -n host-b addr add 10.0.0.3/24 dev x-p1 + +# Wait for initial ARP/ND traffic to settle before testing suppression. +sleep 2 + +# Capture on host-b filtering only for ARP targeting 10.0.0.99. +ip netns exec host-b timeout 3 \ + tcpdump -c1 -t -nn -i x-p1 'arp and arp[24:4] = 0x0a000063' \ + >$tmp/tcpdump.out 2>/dev/null & +tcpdump_pid=$! +sleep 0.5 + +# Send an ARP request from host-a for the suppressed IP. +ip netns exec host-a arping -c1 -w1 -I x-p0 10.0.0.99 >$tmp/arping.out + +# Wait for tcpdump to finish. +wait $tcpdump_pid 2>/dev/null || true + +if grep -q ARP $tmp/tcpdump.out; then + cat $tmp/tcpdump.out + fail "ARP request for 10.0.0.99 was flooded to host-b (should have been suppressed)" +fi + +# Verify the suppressed ARP reply has the correct remote MAC. +if ! grep -qi 'DE:AD:BE:EF:00:01' $tmp/arping.out; then + cat $tmp/arping.out + fail "ARP reply did not contain expected MAC de:ad:be:ef:00:01" +fi + +# Verify normal L2 connectivity still works (non-suppressed traffic). +ip netns exec host-a ping -i0.01 -c3 -W1 -n 10.0.0.3 || fail "L2 ping host-a->host-b failed" +ip netns exec host-b ping -i0.01 -c3 -W1 -n 10.0.0.2 || fail "L2 ping host-b->host-a failed" diff --git a/smoke/evpn_neigh_suppress_frr_test.sh b/smoke/evpn_neigh_suppress_frr_test.sh new file mode 100755 index 000000000..271235527 --- /dev/null +++ b/smoke/evpn_neigh_suppress_frr_test.sh @@ -0,0 +1,205 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +# Verify ARP suppression with FRR EVPN Type-2 MAC+IP routes. +# +# Both VTEPs have bridge SVIs. The Linux peer has neigh_suppress on the +# VXLAN interface and advertise-svi-ip in BGP. Grout has neigh_suppress on +# the bridge and neigh_snoop on the bridge to learn local neighbors. +# +# When hosts ARP for their gateways, the VTEPs learn the IP+MAC bindings +# and advertise them as Type-2 MAC+IP routes. Remote VTEPs install the +# neighbors and the suppress node answers ARP requests locally. +# +# .-------------------------. .-------------------------. +# | evpn-peer | | grout | +# | | | | +# | +----------+ | | +----------+ | +# | | vxlan100 | nh_suppr | | | vxlan100 | | +# | +----+-----+ | | +-----+----+ | +# | | | | | | +# | +---+---+ | | +---+---+ | +# | | br100 | | | nh_suppr | br100 | | +# | +---+---+ | | +---+---+ | +# | | .1 | | .2 | | +# | +---+---+ +-------+ | | +------+ +---+---+ | +# | | p1 | | x-p0 | | | | p0 | | p1 | | +# | +---+---+ +---+---+ | | +---+--+ +---+---+ | +# '------|------------|-----' '-----|------------|------' +# | | | | +# | | <----- BGP -----> | | +# .------|--------. `----------------------' .-------|------. +# | +---+----+ | underlay | +---+----+ | +# | | x-p1 | | 172.16.0.0/24 | | x-p1 | | +# | +--------+ | | +--------+ | +# | .2 | | .3 | +# | host-a | overlay | host-b | +# '---------------' 10.0.0.0/24 '--------------' + +. $(dirname $0)/_init_frr.sh + +# right side ------------------------------------------------------------------- +create_interface p0 +set_ip_address p0 172.16.0.2/24 + +# left side -------------------------------------------------------------------- +start_frr evpn-peer 0 + +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.forwarding=1 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.default.rp_filter=0 + +move_to_netns x-p0 evpn-peer +ip -n evpn-peer addr add 172.16.0.1/24 dev x-p0 + +ip -n evpn-peer link add br100 type bridge +ip -n evpn-peer link set br100 up +# linux needs an IP address in order to learn neighbors +ip -n evpn-peer addr add 10.0.0.4/24 dev br100 + +ip -n evpn-peer link add vxlan100 type vxlan id 100 local 172.16.0.1 dstport 4789 nolearning +ip -n evpn-peer link set vxlan100 master br100 +ip -n evpn-peer link set vxlan100 type bridge_slave neigh_suppress on learning off +ip -n evpn-peer link set vxlan100 up + +ip -n evpn-peer link add p1 type veth peer name x-p1 +ip -n evpn-peer link set p1 master br100 +ip -n evpn-peer link set p1 up + +netns_add host-a +ip -n evpn-peer link set x-p1 netns host-a +ip -n host-a link set x-p1 up +ip -n host-a addr add 10.0.0.2/24 dev x-p1 +ip -n host-a route add default via 10.0.0.4 + +# BGP EVPN on peer with advertise-svi-ip for Type-2 MAC+IP routes. +vtysh -N evpn-peer <<-EOF +configure terminal + +router bgp 65000 + bgp router-id 172.16.0.1 + no bgp default ipv4-unicast + + neighbor 172.16.0.2 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.2 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +exit +EOF + +# BGP EVPN on grout. +vtysh <<-EOF +configure terminal + +router bgp 65000 + bgp router-id 172.16.0.2 + no bgp default ipv4-unicast + + neighbor 172.16.0.1 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.1 activate + advertise-all-vni + advertise-svi-ip + exit-address-family +exit +EOF + +# Wait for advertise-all-vni to take effect before creating bridge members. +attempts=0 +while ! vtysh -c "show evpn" | grep -q "L2 VNIs"; do + if [ "$attempts" -ge 10 ]; then + vtysh -c "show evpn" + fail "EVPN not enabled in zebra" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +grcli interface add bridge br100 neigh_suppress on +grcli address add 10.0.0.1/24 iface br100 +grcli interface set bridge br100 neigh_snoop on +grcli interface add vxlan vxlan100 vni 100 local 172.16.0.2 domain br100 + +attempts=0 +while ! vtysh -c "show evpn vni 100" | grep -q "VNI: 100"; do + if [ "$attempts" -ge 10 ]; then + vtysh -c "show evpn vni 100" + fail "zebra did not learn VNI 100" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +create_interface p1 domain br100 + +netns_add host-b +move_to_netns x-p1 host-b +ip -n host-b addr add 10.0.0.3/24 dev x-p1 +ip -n host-b route add default via 10.0.0.1 + +# -- Wait for EVPN type-3 (flood VTEP) exchange ------------------------------- +attempts=0 +while ! grcli -j flood vtep show | jq -e '.[] | select(.addr == "172.16.0.1")'; do + if [ "$attempts" -ge 10 ]; then + grcli flood vtep show + fail "Grout did not learn remote VTEP 172.16.0.1" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Trigger ARP learning on both sides --------------------------------------- +# Each host pings its gateway SVI. This makes the host ARP for the gateway, +# teaching the local VTEP the host's IP+MAC. FRR advertises the binding as +# a Type-2 MAC+IP route. +ip netns exec host-a ping -c1 -W1 10.0.0.4 || true +ip netns exec host-b ping -c1 -W1 10.0.0.1 || true + +# Also establish overlay connectivity. +ip netns exec host-b ping -i0.1 -c3 -W1 10.0.0.2 +ip netns exec host-a ping -i0.1 -c3 -W1 10.0.0.3 + +# -- Wait for Type-2 MAC+IP route from peer ----------------------------------- +mac_a=$(ip netns exec host-a cat /sys/class/net/x-p1/address) +attempts=0 +while ! vtysh -c "show bgp l2vpn evpn route type 2" | grep -qF "10.0.0.2"; do + if [ "$attempts" -ge 10 ]; then + vtysh -c "show bgp l2vpn evpn route type 2" + fail "No Type-2 MAC+IP route for 10.0.0.2" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Wait for remote nexthop to be installed via DPLANE_OP_NEIGH_INSTALL ------ +attempts=0 +while ! grcli nexthop show | grep -q "10.0.0.2.*remote"; do + if [ "$attempts" -ge 10 ]; then + grcli nexthop show + fail "Remote nexthop for 10.0.0.2 not installed" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Verify ARP suppression --------------------------------------------------- +# Flush host-b's ARP cache to force a new ARP request. +ip -n host-b neigh flush dev x-p1 + +# Send ARP from host-b. Grout should reply from its nexthop cache. +ip netns exec host-b arping -c1 -w2 -I x-p1 10.0.0.2 >$tmp/arping.out + +# Verify host-b received reply with the correct MAC. +if ! grep -qi "$mac_a" $tmp/arping.out; then + cat $tmp/arping.out + fail "ARP reply did not contain expected MAC $mac_a" +fi + +# -- Final connectivity check through overlay ---------------------------------- +ip netns exec host-b ping -i0.1 -c3 -W1 10.0.0.2 +ip netns exec host-a ping -i0.1 -c3 -W1 10.0.0.3