From 5e806273ae390ad51b49eb4708bbb2878c6d9d45 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sat, 21 Mar 2026 20:25:13 +0100 Subject: [PATCH 1/8] l4: add trace with L4 ports Add trace output showing source and destination ports for UDP and TCP packets in l4_input_local. Also parse TCP headers to extract ports before falling through to management plane forwarding. Signed-off-by: Robin Jarry --- modules/l4/l4_input_local.c | 43 +++++++++++++++++++++++++++++++------ 1 file changed, 37 insertions(+), 6 deletions(-) diff --git a/modules/l4/l4_input_local.c b/modules/l4/l4_input_local.c index 702bca16d..d9ebbb980 100644 --- a/modules/l4/l4_input_local.c +++ b/modules/l4/l4_input_local.c @@ -7,6 +7,7 @@ #include "l4.h" #include "log.h" +#include #include LOG_TYPE("graph"); @@ -62,13 +63,27 @@ int l4_input_unalias_port(uint8_t proto, rte_be16_t alias) { return 0; } +struct l4_trace_data { + rte_be16_t sport; + rte_be16_t dport; +}; + +static int trace_l4_format(char *buf, size_t len, const void *data, size_t /*data_len*/) { + const struct l4_trace_data *d = data; + return snprintf( + buf, len, "src=%u dst=%u", rte_be_to_cpu_16(d->sport), rte_be_to_cpu_16(d->dport) + ); +} + static uint16_t l4_input_local_process( struct rte_graph *graph, struct rte_node *node, void **objs, uint16_t nb_objs ) { - struct rte_udp_hdr *hdr; + struct rte_tcp_hdr *tcp; + struct rte_udp_hdr *udp; + rte_be16_t sport, dport; struct rte_mbuf *mbuf; rte_edge_t edge; uint8_t proto; @@ -76,6 +91,7 @@ static uint16_t l4_input_local_process( for (uint16_t i = 0; i < nb_objs; i++) { mbuf = objs[i]; edge = BAD_PROTO; + sport = dport = 0; if (mbuf->packet_type & RTE_PTYPE_L3_IPV4) proto = ip_local_mbuf_data(mbuf)->proto; @@ -84,14 +100,28 @@ static uint16_t l4_input_local_process( else goto next; - if (proto != IPPROTO_UDP) { + switch (proto) { + case IPPROTO_UDP: + udp = rte_pktmbuf_mtod(mbuf, struct rte_udp_hdr *); + sport = udp->src_port; + dport = udp->dst_port; + edge = udp_edges[udp->dst_port]; + break; + case IPPROTO_TCP: + tcp = rte_pktmbuf_mtod(mbuf, struct rte_tcp_hdr *); + sport = tcp->src_port; + dport = tcp->dst_port; + // fallthrough + default: edge = MANAGEMENT; - goto next; + break; } - - hdr = rte_pktmbuf_mtod(mbuf, struct rte_udp_hdr *); - edge = udp_edges[hdr->dst_port]; next: + if (gr_mbuf_is_traced(mbuf)) { + struct l4_trace_data *t = gr_mbuf_trace_add(mbuf, node, sizeof(*t)); + t->sport = sport; + t->dport = dport; + } rte_node_enqueue_x1(graph, node, edge, mbuf); } return nb_objs; @@ -117,6 +147,7 @@ static struct gr_node_info info = { .node = &input_node, .type = GR_NODE_T_L4, .register_callback = l4_input_local_register, + .trace_format = trace_l4_format, }; GR_NODE_REGISTER(info); From 32453bd5233cbc22c8e46d8efded34ef5563323a Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:17:01 +0100 Subject: [PATCH 2/8] vrf: add a random mac address on creation VRF interfaces now get a random MAC address at creation time. This MAC will be used as the Router MAC for EVPN L3VNI type-5 routes. Signed-off-by: Robin Jarry --- api/gr_api.h | 2 +- frr/if_grout.c | 3 +++ modules/infra/api/gr_infra.h | 2 ++ modules/infra/cli/vrf.c | 9 ++++++++- modules/infra/control/vrf.c | 18 ++++++++++++++++++ modules/ip6/control/address.c | 3 +++ 6 files changed, 35 insertions(+), 2 deletions(-) diff --git a/api/gr_api.h b/api/gr_api.h index a238d168c..0713d7bf9 100644 --- a/api/gr_api.h +++ b/api/gr_api.h @@ -11,7 +11,7 @@ #include // Must be bumped when making non-backward compatible changes in API headers -#define GR_API_VERSION 2 +#define GR_API_VERSION 3 // API request header. struct gr_api_request { diff --git a/frr/if_grout.c b/frr/if_grout.c index cf370044a..9b86d65b0 100644 --- a/frr/if_grout.c +++ b/frr/if_grout.c @@ -51,6 +51,7 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { const struct gr_iface_info_vlan *gr_vlan = NULL; const struct gr_iface_info_port *gr_port = NULL; const struct gr_iface_info_bond *gr_bond = NULL; + const struct gr_iface_info_vrf *gr_vrf = NULL; ifindex_t bridge_ifindex = IFINDEX_INTERNAL; ifindex_t link_ifindex = IFINDEX_INTERNAL; ifindex_t bond_ifindex = IFINDEX_INTERNAL; @@ -86,6 +87,8 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { link_type = ZEBRA_LLT_IPIP; break; case GR_IFACE_TYPE_VRF: + gr_vrf = (const struct gr_iface_info_vrf *)&gr_if->info; + mac = &gr_vrf->mac; link_type = ZEBRA_LLT_ETHER; zif_type = ZEBRA_IF_VRF; break; diff --git a/modules/infra/api/gr_infra.h b/modules/infra/api/gr_infra.h index f0895d65a..af3b637c8 100644 --- a/modules/infra/api/gr_infra.h +++ b/modules/infra/api/gr_infra.h @@ -127,6 +127,7 @@ struct gr_iface_info_port { // VRF reconfiguration attribute flags. #define GR_VRF_SET_FIB GR_BIT64(32) +#define GR_VRF_SET_MAC GR_BIT64(33) // Per-AF FIB configuration. struct gr_iface_info_vrf_fib { @@ -138,6 +139,7 @@ struct gr_iface_info_vrf_fib { struct gr_iface_info_vrf { struct gr_iface_info_vrf_fib ipv4; struct gr_iface_info_vrf_fib ipv6; + struct rte_ether_addr mac; // Used as Router MAC for EVPN L3VNI. }; // VLAN reconfiguration attribute flags. diff --git a/modules/infra/cli/vrf.c b/modules/infra/cli/vrf.c index ea7cf0876..a3a854016 100644 --- a/modules/infra/cli/vrf.c +++ b/modules/infra/cli/vrf.c @@ -10,17 +10,19 @@ #define VRF_ATTRS_CMD \ "(rib4-routes RIB4_ROUTES),(fib4-tbl8 FIB4_TBL8)" \ ",(rib6-routes RIB6_ROUTES),(fib6-tbl8 FIB6_TBL8)" \ - ",(description DESCR)" + ",(mac MAC),(description DESCR)" #define VRF_ATTRS_ARGS \ with_help("Max IPv4 routes.", ec_node_uint("RIB4_ROUTES", 1, UINT32_MAX, 10)), \ with_help("IPv4 TBL8 groups.", ec_node_uint("FIB4_TBL8", 1, UINT32_MAX, 10)), \ with_help("Max IPv6 routes.", ec_node_uint("RIB6_ROUTES", 1, UINT32_MAX, 10)), \ with_help("IPv6 TBL8 groups.", ec_node_uint("FIB6_TBL8", 1, UINT32_MAX, 10)), \ + with_help("Set the ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), \ with_help("Interface description.", ec_node("any", "DESCR")) static void vrf_show(struct gr_api_client *, const struct gr_iface *iface, struct gr_object *o) { const struct gr_iface_info_vrf *info = PAYLOAD(iface); + gr_object_field(o, "mac", 0, ETH_F, &info->mac); gr_object_field(o, "rib4_max_routes", GR_DISP_INT, "%u", info->ipv4.max_routes); gr_object_field(o, "fib4_num_tbl8", GR_DISP_INT, "%u", info->ipv4.num_tbl8); gr_object_field(o, "rib6_max_routes", GR_DISP_INT, "%u", info->ipv6.max_routes); @@ -80,6 +82,11 @@ static uint64_t parse_vrf_args( set_attrs |= GR_VRF_SET_FIB; } + if (arg_eth_addr(p, "MAC", &info->mac) == 0) + set_attrs |= GR_VLAN_SET_MAC; + else if (errno != ENOENT) + return 0; + return set_attrs; } diff --git a/modules/infra/control/vrf.c b/modules/infra/control/vrf.c index 79a7cc665..df5522f61 100644 --- a/modules/infra/control/vrf.c +++ b/modules/infra/control/vrf.c @@ -151,6 +151,7 @@ static int netlink_vrf_add(const struct iface *iface) { strerror(errno)); return ret; } + netlink_link_set_mac(vrf->vrf_ifindex, &vrf->mac); } ret = netlink_add_route(iface->cp_id, table_id); @@ -229,6 +230,7 @@ static int iface_vrf_init(struct iface *iface, const void *api_info) { // VRF's vrf_id is its own iface_id (VRF identifier) iface->vrf_id = iface->id; vrf->ref_count = 0; + rte_eth_random_addr(vrf->mac.addr_bytes); if (iface_loopback_create(iface) < 0) return -errno; @@ -347,10 +349,24 @@ static int iface_vrf_reconfig( fib_conf->num_tbl8); } } + if (set_attrs & GR_VRF_SET_MAC && iface_set_eth_addr(iface, &info->mac) < 0) + return -errno; return 0; } +static int iface_vrf_get_eth_addr(const struct iface *iface, struct rte_ether_addr *mac) { + const struct iface_info_vrf *vrf = iface_info_vrf(iface); + *mac = vrf->mac; + return 0; +} + +static int iface_vrf_set_eth_addr(struct iface *iface, const struct rte_ether_addr *mac) { + struct iface_info_vrf *vrf = iface_info_vrf(iface); + vrf->mac = *mac; + return 0; +} + static void iface_vrf_to_api(void *info, const struct iface *iface) { const struct iface_info_vrf *vrf = iface_info_vrf(iface); struct gr_iface_info_vrf *api = info; @@ -364,6 +380,8 @@ static struct iface_type iface_type_vrf = { .priv_size = sizeof(struct iface_info_vrf), .init = iface_vrf_init, .reconfig = iface_vrf_reconfig, + .set_eth_addr = iface_vrf_set_eth_addr, + .get_eth_addr = iface_vrf_get_eth_addr, .fini = iface_vrf_fini, .to_api = iface_vrf_to_api, }; diff --git a/modules/ip6/control/address.c b/modules/ip6/control/address.c index a0fe9eb5d..4e53c3630 100644 --- a/modules/ip6/control/address.c +++ b/modules/ip6/control/address.c @@ -420,6 +420,9 @@ static void ip6_iface_llocal_init(const struct iface *iface) { struct rte_ether_addr mac; unsigned i; + if (iface->type == GR_IFACE_TYPE_VRF) + return; // VRF interfaces shoulnd't have a link local address + if (iface_get_eth_addr(iface, &mac) < 0) return; From ed4cdd1b8dfb431c8b78cf75a2e8640775bf0280 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:17:01 +0100 Subject: [PATCH 3/8] vxlan: inherit VRF MAC for L3VNI router MAC VXLAN interfaces now inherit from their VTEP VRF (encap_vrf_id) MAC address instead of a random one. Some EVPN endpoints make the assumption that there is a unique RMAC per VTEP. When a VXLAN interface is in VRF mode (or moves to one), also synchronize that MAC address to the VRF interface. This ensures the VXLAN interface's MAC matches the Router MAC that FRR advertises in EVPN type-5 routes, so incoming L3VPN packets pass eth_input's check. Signed-off-by: Robin Jarry --- modules/l2/control/vxlan.c | 27 ++++++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) diff --git a/modules/l2/control/vxlan.c b/modules/l2/control/vxlan.c index 316122e8f..e8b28a067 100644 --- a/modules/l2/control/vxlan.c +++ b/modules/l2/control/vxlan.c @@ -45,7 +45,7 @@ struct iface *vxlan_get_iface(rte_be32_t vni, uint16_t encap_vrf_id) { static int iface_vxlan_reconfig( struct iface *iface, uint64_t set_attrs, - const struct gr_iface *, + const struct gr_iface *conf, const void *api_info ) { struct iface_info_vxlan *cur = iface_info_vxlan(iface); @@ -135,10 +135,31 @@ static int iface_vxlan_reconfig( conf_done |= GR_VXLAN_SET_LOCAL; } - if (set_attrs & GR_VXLAN_SET_MAC) { - if (iface_set_eth_addr(iface, &next->mac) < 0) + if (set_attrs & (GR_IFACE_SET_VRF | GR_VXLAN_SET_ENCAP_VRF | GR_VXLAN_SET_MAC)) { + struct iface *vrf = get_vrf_iface(cur->encap_vrf_id); + struct rte_ether_addr mac = next->mac; + + assert(vrf != NULL); + + // Some devices assume a unique RMAC per VTEP. + // When no explicit MAC is given, inherit the VTEP VRF's MAC. + if (rte_is_zero_ether_addr(&mac)) + mac = iface_info_vrf(vrf)->mac; + + if (iface_set_eth_addr(iface, &mac) < 0) goto err; + conf_done |= GR_VXLAN_SET_MAC; + + // If configured for EVPN L3VNI, also synchronize the MAC on the interface VRF. + // So it will be advertised as RMAC by FRR. + vrf = NULL; + if (set_attrs & GR_IFACE_SET_VRF) + vrf = get_vrf_iface(conf->vrf_id); + else if (iface->mode == GR_IFACE_MODE_VRF) + vrf = get_vrf_iface(iface->vrf_id); + if (vrf != NULL && iface_set_eth_addr(vrf, &mac) < 0) + goto err; } // Update the datapath template from the current config. From fd4cd0b217273dd5bb17836e3ad103a67298e42a Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:18:52 +0100 Subject: [PATCH 4/8] ip,ip6: save VTEP address across mbuf private data All GR_MBUF_PRIV_DATA_TYPE types overlay the same memory region. When ip_output resolves a nexthop on a VXLAN interface, it writes eth_output_mbuf_data->dst and ->ether_type which clobber iface_mbuf_data->vtep. By the time vxlan_output reads the vtep field, it contains garbage. Add a vtep field to eth_output_mbuf_data and set it from the nexthop gateway IP when the output interface is a VXLAN. In eth_output, save the vtep value before writing iface_mbuf_data (which shares the same memory) and restore it after. Signed-off-by: Robin Jarry --- modules/infra/datapath/eth.h | 3 +++ modules/infra/datapath/eth_output.c | 3 +++ modules/ip/datapath/ip_output.c | 6 ++++++ modules/ip6/datapath/ip6_output.c | 6 ++++++ 4 files changed, 18 insertions(+) diff --git a/modules/infra/datapath/eth.h b/modules/infra/datapath/eth.h index 73da053bf..b0c28996c 100644 --- a/modules/infra/datapath/eth.h +++ b/modules/infra/datapath/eth.h @@ -5,6 +5,8 @@ #include "mbuf.h" +#include + #include #include @@ -22,6 +24,7 @@ GR_MBUF_PRIV_DATA_TYPE(eth_input_mbuf_data, { eth_domain_t domain; }) GR_MBUF_PRIV_DATA_TYPE(eth_output_mbuf_data, { struct rte_ether_addr dst; rte_be16_t ether_type; + struct l3_addr vtep; }); void gr_eth_input_add_type(rte_be16_t eth_type, const char *node_name); diff --git a/modules/infra/datapath/eth_output.c b/modules/infra/datapath/eth_output.c index f85b0475c..7b3c41fc8 100644 --- a/modules/infra/datapath/eth_output.c +++ b/modules/infra/datapath/eth_output.c @@ -24,6 +24,7 @@ eth_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, struct rte_ether_hdr *eth; uint16_t last_iface_id; struct rte_mbuf *mbuf; + struct l3_addr vtep; rte_edge_t edge; last_iface_id = GR_IFACE_ID_UNDEF; @@ -59,7 +60,9 @@ eth_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, t->src_addr = src_mac; t->ether_type = priv->ether_type; } + vtep = priv->vtep; iface_mbuf_data(mbuf)->vlan_id = 0; + iface_mbuf_data(mbuf)->vtep = vtep; rte_node_enqueue_x1(graph, node, edge, mbuf); } diff --git a/modules/ip/datapath/ip_output.c b/modules/ip/datapath/ip_output.c index 61a0dc14a..78214edc7 100644 --- a/modules/ip/datapath/ip_output.c +++ b/modules/ip/datapath/ip_output.c @@ -134,6 +134,12 @@ ip_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, u eth_data = eth_output_mbuf_data(mbuf); eth_data->dst = l3->mac; eth_data->ether_type = RTE_BE16(RTE_ETHER_TYPE_IPV4); + if (iface->type == GR_IFACE_TYPE_VXLAN) { + eth_data->vtep.af = GR_AF_IP4; + eth_data->vtep.ipv4 = l3->ipv4; + } else { + eth_data->vtep.af = GR_AF_UNSPEC; + } sent++; next: if (gr_mbuf_is_traced(mbuf)) { diff --git a/modules/ip6/datapath/ip6_output.c b/modules/ip6/datapath/ip6_output.c index 9eb89aa6c..63137862c 100644 --- a/modules/ip6/datapath/ip6_output.c +++ b/modules/ip6/datapath/ip6_output.c @@ -116,6 +116,12 @@ ip6_output_process(struct rte_graph *graph, struct rte_node *node, void **objs, else eth_data->dst = l3->mac; eth_data->ether_type = RTE_BE16(RTE_ETHER_TYPE_IPV6); + if (iface->type == GR_IFACE_TYPE_VXLAN) { + eth_data->vtep.af = GR_AF_IP6; + eth_data->vtep.ipv6 = l3->ipv6; + } else { + eth_data->vtep.af = GR_AF_UNSPEC; + } sent++; next: if (gr_mbuf_is_traced(mbuf)) { From 2fe1ddc36ac6aa0d3bdf3ad110e7413d91d4dee1 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:18:23 +0100 Subject: [PATCH 5/8] nexthop: add GR_NH_F_REMOTE flag Add a flag to mark nexthops learned from remote VTEPs via EVPN. These nexthops carry a known IP+MAC pair from the control plane and are set to GR_NH_S_REACHABLE with GR_NH_F_STATIC so they bypass ARP/ND probing and aging. Signed-off-by: Robin Jarry --- modules/infra/api/gr_nexthop.h | 3 +++ modules/infra/cli/nexthop.c | 7 +++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/modules/infra/api/gr_nexthop.h b/modules/infra/api/gr_nexthop.h index cc8c09e32..cbc4e7939 100644 --- a/modules/infra/api/gr_nexthop.h +++ b/modules/infra/api/gr_nexthop.h @@ -25,6 +25,7 @@ typedef enum : uint8_t { GR_NH_F_GATEWAY = GR_BIT8(2), // Gateway route. GR_NH_F_LINK = GR_BIT8(3), // Connected link route. GR_NH_F_MCAST = GR_BIT8(4), // Multicast address. + GR_NH_F_REMOTE = GR_BIT8(5), // Remote VTEP nexthop (EVPN). } gr_nh_flags_t; // Nexthop types for different forwarding behaviors. @@ -176,6 +177,8 @@ static inline const char *gr_nh_flag_name(const gr_nh_flags_t flag) { return "link"; case GR_NH_F_MCAST: return "multicast"; + case GR_NH_F_REMOTE: + return "remote"; } return "?"; } diff --git a/modules/infra/cli/nexthop.c b/modules/infra/cli/nexthop.c index 484fc8fcc..162595bb4 100644 --- a/modules/infra/cli/nexthop.c +++ b/modules/infra/cli/nexthop.c @@ -379,6 +379,8 @@ static cmd_status_t nh_l3_add(struct gr_api_client *c, const struct ec_pnode *p) goto out; if (arg_eth_addr(p, "MAC", &l3->mac) < 0 && errno != ENOENT) goto out; + if (arg_str(p, "remote")) + l3->flags |= GR_NH_F_REMOTE; if (gr_api_client_send_recv(c, GR_NH_ADD, len, req, NULL) < 0) goto out; @@ -619,13 +621,14 @@ static int ctx_init(struct ec_node *root) { ret = CLI_COMMAND( NEXTHOP_ADD_CTX(root), - "l3 iface IFACE [(id ID),(address IP),(mac MAC)]", + "l3 iface IFACE [(id ID),(address IP),(mac MAC),(remote)]", nh_l3_add, "Add a new L3 nexthop.", with_help("IPv4/6 address.", ec_node_re("IP", IP_ANY_RE)), with_help("Ethernet address.", ec_node_re("MAC", ETH_ADDR_RE)), with_help("Nexthop ID.", ec_node_uint("ID", 1, UINT32_MAX - 1, 10)), - with_help("Output interface.", ec_node_dyn("IFACE", complete_iface_names, NULL)) + with_help("Output interface.", ec_node_dyn("IFACE", complete_iface_names, NULL)), + with_help("Mark as remote (EVPN).", ec_node_str("remote", "remote")) ); if (ret < 0) return ret; From 7134073fdb5c5ff0010698544b84672725477133 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:16:11 +0100 Subject: [PATCH 6/8] frr: add L3VNI dplane-thread state EVPN type-5 routes require two pieces of state that FRR delivers out of order: the remote router MAC arrives via DPLANE_OP_NEIGH_INSTALL before the nexthop via DPLANE_OP_NH_INSTALL. Both need to be combined when grout_add_nexthop builds the GR_NH_ADD request. Add l3vni_map to maintain two hash tables on the dplane thread: vrf_id -> vxlan_iface_id: used to redirect nexthops from the VRF interface (FRR's SVI model) to the VXLAN interface so that ip_output routes packets into the tunnel. (vrf_id, vtep) -> RMAC: caches the remote router MAC until the matching nexthop install arrives. Both tables run exclusively on the dplane thread so no locking is needed. Signed-off-by: Robin Jarry --- frr/l3vni_map.c | 119 ++++++++++++++++++++++++++++++++++++++++++++++++ frr/l3vni_map.h | 45 ++++++++++++++++++ frr/meson.build | 1 + 3 files changed, 165 insertions(+) create mode 100644 frr/l3vni_map.c create mode 100644 frr/l3vni_map.h diff --git a/frr/l3vni_map.c b/frr/l3vni_map.c new file mode 100644 index 000000000..bf427e34a --- /dev/null +++ b/frr/l3vni_map.c @@ -0,0 +1,119 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2026 Robin Jarry + +#include "if_map.h" +#include "l3vni_map.h" + +#include + +#include +#include + +// All functions in this file run exclusively on the dplane thread +// (grout_link_change, grout_add_nexthop, grout_neigh_update_ctx). +// No locking required. + +// VRF -> VXLAN iface mapping /////////////////////////////////////////////////// + +PREDECL_HASH(l3vni_hash); + +struct l3vni_entry { + struct l3vni_hash_item item; + uint16_t vrf_id; + uint16_t vxlan_iface_id; +}; + +static int l3vni_cmp(const struct l3vni_entry *a, const struct l3vni_entry *b) { + return numcmp(a->vrf_id, b->vrf_id); +} + +static uint32_t l3vni_hashfn(const struct l3vni_entry *e) { + return e->vrf_id; +} + +DECLARE_HASH(l3vni_hash, struct l3vni_entry, item, l3vni_cmp, l3vni_hashfn); +static struct l3vni_hash_head l3vni_entries = INIT_HASH(l3vni_entries); + +void l3vni_set(uint16_t vrf_id, uint16_t vxlan_iface_id) { + struct l3vni_entry *e, key = {.vrf_id = vrf_id}; + + e = l3vni_hash_find(&l3vni_entries, &key); + if (e != NULL) { + e->vxlan_iface_id = vxlan_iface_id; + return; + } + e = XCALLOC(MTYPE_GROUT_MEM, sizeof(*e)); + e->vrf_id = vrf_id; + e->vxlan_iface_id = vxlan_iface_id; + l3vni_hash_add(&l3vni_entries, e); +} + +void l3vni_del(uint16_t vrf_id) { + struct l3vni_entry key = {.vrf_id = vrf_id}; + struct l3vni_entry *e = l3vni_hash_find(&l3vni_entries, &key); + + if (e != NULL) { + l3vni_hash_del(&l3vni_entries, e); + XFREE(MTYPE_GROUT_MEM, e); + } +} + +uint16_t l3vni_get_vxlan(uint16_t vrf_id) { + struct l3vni_entry key = {.vrf_id = vrf_id}; + struct l3vni_entry *e = l3vni_hash_find(&l3vni_entries, &key); + return e ? e->vxlan_iface_id : GR_IFACE_ID_UNDEF; +} + +// (VRF, VTEP) -> RMAC cache /////////////////////////////////////////////////// + +PREDECL_HASH(rmac_hash); + +struct rmac_entry { + struct rmac_hash_item item; + uint16_t vrf_id; + ip4_addr_t vtep; + struct ethaddr mac; +}; + +static int rmac_cmp(const struct rmac_entry *a, const struct rmac_entry *b) { + int r = numcmp(a->vrf_id, b->vrf_id); + return r ? r : numcmp(a->vtep, b->vtep); +} + +static uint32_t rmac_hashfn(const struct rmac_entry *e) { + return jhash_2words(e->vrf_id, e->vtep, 0); +} + +DECLARE_HASH(rmac_hash, struct rmac_entry, item, rmac_cmp, rmac_hashfn); +static struct rmac_hash_head rmac_entries = INIT_HASH(rmac_entries); + +void l3vni_rmac_set(uint16_t vrf_id, ip4_addr_t vtep, const struct ethaddr *mac) { + struct rmac_entry *e, key = {.vrf_id = vrf_id, .vtep = vtep}; + + e = rmac_hash_find(&rmac_entries, &key); + if (e != NULL) { + e->mac = *mac; + return; + } + e = XCALLOC(MTYPE_GROUT_MEM, sizeof(*e)); + e->vrf_id = vrf_id; + e->vtep = vtep; + e->mac = *mac; + rmac_hash_add(&rmac_entries, e); +} + +void l3vni_rmac_del(uint16_t vrf_id, ip4_addr_t vtep) { + struct rmac_entry key = {.vrf_id = vrf_id, .vtep = vtep}; + struct rmac_entry *e = rmac_hash_find(&rmac_entries, &key); + + if (e != NULL) { + rmac_hash_del(&rmac_entries, e); + XFREE(MTYPE_GROUT_MEM, e); + } +} + +const struct ethaddr *l3vni_rmac_get(uint16_t vrf_id, ip4_addr_t vtep) { + struct rmac_entry key = {.vrf_id = vrf_id, .vtep = vtep}; + struct rmac_entry *e = rmac_hash_find(&rmac_entries, &key); + return e ? &e->mac : NULL; +} diff --git a/frr/l3vni_map.h b/frr/l3vni_map.h new file mode 100644 index 000000000..a005dfbe5 --- /dev/null +++ b/frr/l3vni_map.h @@ -0,0 +1,45 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +// Copyright (c) 2026 Robin Jarry + +// L3VNI dplane-thread state for EVPN symmetric IRB (Integrated Routing and +// Bridging). +// +// FRR's EVPN type-5 (IP prefix) routes use a per-VRF L3 VNI with a VXLAN +// interface. Two mappings are maintained on the dplane thread (no locking): +// +// VRF -> VXLAN iface +// +// grout_add_nexthop() redirects nexthops from the VRF (FRR's SVI model) to +// the VXLAN interface so that ip_output routes packets into the tunnel. +// +// (VRF, VTEP) -> RMAC +// +// DPLANE_OP_NEIGH_INSTALL delivers the remote router MAC before +// DPLANE_OP_NH_INSTALL creates the nexthop. The RMAC is cached here and +// applied by grout_add_nexthop() when the nexthop arrives. + +#pragma once + +#include "lib/prefix.h" + +#include + +#include + +// Register vrf_id -> vxlan_iface_id mapping. +void l3vni_set(uint16_t vrf_id, uint16_t vxlan_iface_id); + +// Remove mapping for vrf_id. +void l3vni_del(uint16_t vrf_id); + +// Return vxlan iface id for vrf_id, or GR_IFACE_ID_UNDEF. +uint16_t l3vni_get_vxlan(uint16_t vrf_id); + +// Cache remote VTEP router MAC for (vrf_id, vtep). +void l3vni_rmac_set(uint16_t vrf_id, ip4_addr_t vtep, const struct ethaddr *mac); + +// Remove cached RMAC for (vrf_id, vtep). +void l3vni_rmac_del(uint16_t vrf_id, ip4_addr_t vtep); + +// Look up cached RMAC for (vrf_id, vtep), or NULL. +const struct ethaddr *l3vni_rmac_get(uint16_t vrf_id, ip4_addr_t vtep); diff --git a/frr/meson.build b/frr/meson.build index 1e95e04b1..42ca34f30 100644 --- a/frr/meson.build +++ b/frr/meson.build @@ -32,6 +32,7 @@ frr_plugin = shared_module( files( 'if_grout.c', 'if_map.c', + 'l3vni_map.c', 'rt_grout.c', 'zebra_dplane_grout.c', ) + grout_header, From 0a0ce09994aa0e798991c69daddf303afa461df7 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 19:19:19 +0100 Subject: [PATCH 7/8] frr: add L3VPN support Handle EVPN type-5 (IP prefix) routes with symmetric IRB over VXLAN. Present VRF-mode VXLAN interfaces to FRR as bridge slaves of the VRF interface. FRR requires an SVI derived from a bridge master to bring the L3VNI up and compute the Router MAC for type-5 routes. The VRF MAC (set in a previous commit) serves as the Router MAC. Handle DPLANE_OP_NEIGH_INSTALL/DELETE to cache remote router MACs delivered by FRR before the corresponding nexthop install arrives. When grout_add_nexthop processes an L3 nexthop in a VRF with an L3VNI, it redirects the interface from VRF to VXLAN and applies the cached RMAC. Signed-off-by: Robin Jarry --- frr/if_grout.c | 13 +++++++++++++ frr/rt_grout.c | 42 ++++++++++++++++++++++++++++++++++++++++ frr/rt_grout.h | 1 + frr/zebra_dplane_grout.c | 5 +++++ 4 files changed, 61 insertions(+) diff --git a/frr/if_grout.c b/frr/if_grout.c index 9b86d65b0..d48e975ba 100644 --- a/frr/if_grout.c +++ b/frr/if_grout.c @@ -3,6 +3,7 @@ #include "if_grout.h" #include "if_map.h" +#include "l3vni_map.h" #include "log_grout.h" #include "zebra_dplane_grout.h" @@ -154,6 +155,16 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { dplane_ctx_set_ifp_table_id( ctx, vrf_grout_to_frr(gr_if->base.vrf_id) ); + + // For VXLAN in VRF mode, present it as a bridge slave + // of the VRF interface. FRR requires an SVI (derived + // from the bridge master) to bring the L3VNI up and + // compute the Router MAC for EVPN type-5 routes. + if (zif_type == ZEBRA_IF_VXLAN) { + bridge_ifindex = ifindex_grout_to_frr(gr_if->base.vrf_id); + slave_type = ZEBRA_IF_SLAVE_BRIDGE; + l3vni_set(gr_if->base.vrf_id, gr_if->id); + } break; case GR_IFACE_MODE_BOND: bond_ifindex = ifindex_grout_to_frr(gr_if->domain_id); @@ -204,6 +215,8 @@ void grout_link_change(struct gr_iface *gr_if, bool new, bool startup) { } else { dplane_ctx_set_op(ctx, DPLANE_OP_INTF_DELETE); dplane_ctx_set_status(ctx, ZEBRA_DPLANE_REQUEST_QUEUED); + if (gr_vxlan != NULL && gr_if->mode == GR_IFACE_MODE_VRF) + l3vni_del(gr_if->base.vrf_id); remove_mapping_by_grout_ifindex(gr_if->id); } diff --git a/frr/rt_grout.c b/frr/rt_grout.c index 431667cf9..fbe149d5e 100644 --- a/frr/rt_grout.c +++ b/frr/rt_grout.c @@ -2,6 +2,7 @@ // Copyright (c) 2025 Maxime Leroy, Free Mobile #include "if_map.h" +#include "l3vni_map.h" #include "log_grout.h" #include "rt_grout.h" @@ -658,7 +659,9 @@ grout_add_nexthop(uint32_t nh_id, gr_nh_origin_t origin, const struct nexthop *n struct gr_nexthop_info_srv6 *sr6; struct gr_nh_add_req *req = NULL; struct gr_nexthop_info_l3 *l3; + const struct ethaddr *rmac; size_t len = sizeof(*req); + uint16_t vxlan_iface_id; gr_nh_type_t type; switch (nh->type) { @@ -706,12 +709,25 @@ grout_add_nexthop(uint32_t nh_id, gr_nh_origin_t origin, const struct nexthop *n switch (type) { case GR_NH_T_L3: + // For L3 nexthops in VRFs with an L3VNI, redirect the iface from + // the VRF (SVI in FRR's model) to the VXLAN interface. Grout + // routes packets directly through the VXLAN tunnel. + vxlan_iface_id = l3vni_get_vxlan(req->nh.vrf_id); + if (vxlan_iface_id != GR_IFACE_ID_UNDEF) + req->nh.iface_id = vxlan_iface_id; + switch (nh->type) { case NEXTHOP_TYPE_IPV4: case NEXTHOP_TYPE_IPV4_IFINDEX: l3 = (struct gr_nexthop_info_l3 *)req->nh.info; l3->af = GR_AF_IP4; memcpy(&l3->ipv4, &nh->gate.ipv4, sizeof(l3->ipv4)); + // Apply cached RMAC from EVPN NEIGH install if available. + rmac = l3vni_rmac_get(req->nh.vrf_id, l3->ipv4); + if (rmac != NULL) { + memcpy(&l3->mac, rmac, sizeof(l3->mac)); + l3->flags |= GR_NH_F_REMOTE; + } break; case NEXTHOP_TYPE_IPV6: case NEXTHOP_TYPE_IPV6_IFINDEX: @@ -1012,6 +1028,32 @@ enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx) { return ret == 0 ? ZEBRA_DPLANE_REQUEST_SUCCESS : ZEBRA_DPLANE_REQUEST_FAILURE; } +enum zebra_dplane_result grout_neigh_update_ctx(struct zebra_dplane_ctx *ctx) { + const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); + bool add = dplane_ctx_get_op(ctx) != DPLANE_OP_NEIGH_DELETE; + uint16_t vrf_id = vrf_frr_to_grout(dplane_ctx_get_vrf(ctx)); + + if (addr->ipa_type != IPADDR_V4) { + gr_log_debug("only IPv4 VTEP addresses supported, skip"); + return ZEBRA_DPLANE_REQUEST_SUCCESS; + } + + // Cache the RMAC for later use by grout_add_nexthop. We cannot + // create a separate nexthop here because grout's L3 nexthop hash + // keys on (vrf, addr) without iface_id, so it would collide with + // the route nexthop that FRR installs right after. + if (add) { + const struct ethaddr *mac = dplane_ctx_neigh_get_mac(ctx); + gr_log_debug("cache rmac vrf=%u %pIA %pEA", vrf_id, addr, mac); + l3vni_rmac_set(vrf_id, addr->ipaddr_v4.s_addr, mac); + } else { + gr_log_debug("uncache rmac vrf=%u %pIA", vrf_id, addr); + l3vni_rmac_del(vrf_id, addr->ipaddr_v4.s_addr); + } + + return ZEBRA_DPLANE_REQUEST_SUCCESS; +} + enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx) { const struct ipaddr *addr = dplane_ctx_neigh_get_ipaddr(ctx); bool add = dplane_ctx_get_op(ctx) == DPLANE_OP_VTEP_ADD; diff --git a/frr/rt_grout.h b/frr/rt_grout.h index 4f9a84652..57ca525d3 100644 --- a/frr/rt_grout.h +++ b/frr/rt_grout.h @@ -18,4 +18,5 @@ void grout_nexthop_change(bool new, struct gr_nexthop *gr_nh, bool startup); void grout_macfdb_change(const struct gr_fdb_entry *fdb, bool new); enum zebra_dplane_result grout_macfdb_update_ctx(struct zebra_dplane_ctx *ctx); +enum zebra_dplane_result grout_neigh_update_ctx(struct zebra_dplane_ctx *ctx); enum zebra_dplane_result grout_vxlan_flood_update_ctx(struct zebra_dplane_ctx *ctx); diff --git a/frr/zebra_dplane_grout.c b/frr/zebra_dplane_grout.c index 65b2a7464..c4e71179d 100644 --- a/frr/zebra_dplane_grout.c +++ b/frr/zebra_dplane_grout.c @@ -949,6 +949,11 @@ static enum zebra_dplane_result zd_grout_process_update(struct zebra_dplane_ctx case DPLANE_OP_MAC_DELETE: return grout_macfdb_update_ctx(ctx); + case DPLANE_OP_NEIGH_INSTALL: + case DPLANE_OP_NEIGH_UPDATE: + case DPLANE_OP_NEIGH_DELETE: + return grout_neigh_update_ctx(ctx); + case DPLANE_OP_VTEP_ADD: case DPLANE_OP_VTEP_DELETE: return grout_vxlan_flood_update_ctx(ctx); From 7765387c386d7a415b10584ad40bd9ecb85aa770 Mon Sep 17 00:00:00 2001 From: Robin Jarry Date: Sun, 22 Mar 2026 11:19:21 +0100 Subject: [PATCH 8/8] smoke: add EVPN L3VPN test Verify EVPN type-5 IP prefix route exchange and L3 connectivity over VXLAN between FRR+grout and a standalone FRR+Linux peer. Each side has a VRF with an L3 VNI (1000) and a host on a local subnet. BGP EVPN advertises connected prefixes as type-5 routes. Grout uses a VXLAN in VRF mode (no bridge needed). The peer uses the standard Linux bridge+SVI model. The test verifies L3VNI recognition, type-5 route exchange, route installation in the VRF, RMAC presence on the route nexthop, and end-to-end ping through the overlay. Signed-off-by: Robin Jarry --- smoke/evpn_l3vpn_frr_test.sh | 242 +++++++++++++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100755 smoke/evpn_l3vpn_frr_test.sh diff --git a/smoke/evpn_l3vpn_frr_test.sh b/smoke/evpn_l3vpn_frr_test.sh new file mode 100755 index 000000000..e8278c227 --- /dev/null +++ b/smoke/evpn_l3vpn_frr_test.sh @@ -0,0 +1,242 @@ +#!/bin/bash +# SPDX-License-Identifier: BSD-3-Clause +# Copyright (c) 2026 Robin Jarry + +# This test verifies EVPN Type-5 (IP prefix) L3VPN connectivity using symmetric +# IRB (Integrated Routing and Bridging) over VXLAN between FRR+Grout and +# a standalone FRR+Linux peer. +# +# Each side has a VRF with an L3 VNI (1000) and a host connected to a local +# port. BGP EVPN advertises IP prefixes (type-5 routes) and RMAC entries +# (type-2 routes with GR_NH_F_REMOTE nexthops) across the VXLAN overlay. +# +# Success criteria: +# - Both sides exchange EVPN type-5 routes (IP prefixes installed). +# - Host-A and Host-B can ping each other through the L3 VXLAN overlay. +# - RMACs are installed as remote nexthops on the grout side. +# +# .-------------------------------. .-----------------------------. +# | evpn-peer | | grout | +# | | | | +# | .- - - - - - - . | | .- - - - - - - . | +# | ' vrf tenant ' | | ' vrf tenant ' | +# | ' ' | | ' ' | +# | ' +-------+ ' | | ' ' | +# | ' | br-l3 | ' | | ' ' | +# | ' +---+---+ ' | | ' ' | +# | ' | ' | | ' ' | +# | ' +----+-----+ ' | | ' +----------+ ' | +# | ' | vxlan-l3 |........... | | ..........| vxlan-l3 | ' | +# | ' +----------+ ' . | | . ' +----------+ ' | +# | ' ' . | | . ' ' | +# | ' .1 ' . | | . ' .1 ' | +# | ' +------+ ' .1 | | .2 ' +-------+ ' | +# | ' | p1 | ' +--------+ | | +------+ ' | p1 | ' | +# | ' +--+---+ ' | x-p0 | | | | p0 | ' +---+---+ ' | +# | '- - - |- - - -' +---+----+ | | +--+---+ '- - - |- - - -' | +# '--------|---------------|------' '----|--------------|---------' +# | | | | +# | | <------- BGP ----> | | +# 16.0.0.0/24 '---------------------' 48.0.0.0/24 +# | underlay | +# .-------|-----------. 172.16.0.0/24 .----------|--------. +# | +---+----+ | | +---+----+ | +# | | x-p1 | | | | x-p1 | | +# | +--------+ | <= = = = = = = = = = = = => | +--------+ | +# | .2 | overlay L3VPN | .2 | +# | | | | +# | host-a | | host-b | +# '-------------------' '-------------------' + +. $(dirname $0)/_init_frr.sh + +# right side (grout) ----------------------------------------------------------- +create_interface p0 +set_ip_address p0 172.16.0.2/24 + +# left side (Linux peer) ------------------------------------------------------- +start_frr evpn-peer + +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.forwarding=1 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.all.rp_filter=0 +ip netns exec evpn-peer sysctl -qw net.ipv4.conf.default.rp_filter=0 + +move_to_netns x-p0 evpn-peer +ip -n evpn-peer addr add 172.16.0.1/24 dev x-p0 + +# Create L3VNI VXLAN on the Linux peer with a bridge+SVI (required by Linux) +ip -n evpn-peer link add br-l3 type bridge +ip -n evpn-peer link set br-l3 up + +ip -n evpn-peer link add vxlan-l3 type vxlan id 1000 local 172.16.0.1 dstport 4789 nolearning +ip -n evpn-peer link set vxlan-l3 master br-l3 +ip -n evpn-peer link set vxlan-l3 up + +# Create VRF "tenant" on the peer and bind the L3VNI bridge as SVI +ip -n evpn-peer link add tenant type vrf table 10 +ip -n evpn-peer link set tenant up +ip -n evpn-peer link set br-l3 master tenant + +# Host-facing port in the peer VRF +ip -n evpn-peer link add p1 type veth peer name x-p1 +ip -n evpn-peer link set p1 master tenant +ip -n evpn-peer link set p1 up +ip -n evpn-peer addr add 16.0.0.1/24 dev p1 + +netns_add host-a +ip -n evpn-peer link set x-p1 netns host-a +ip -n host-a link set x-p1 up +ip -n host-a addr add 16.0.0.2/24 dev x-p1 +ip -n host-a route add default via 16.0.0.1 + +# FRR config on the Linux peer +vtysh -N evpn-peer <<-EOF +configure terminal + +vrf tenant + vni 1000 +exit-vrf + +router bgp 65000 + bgp router-id 172.16.0.1 + no bgp default ipv4-unicast + + neighbor 172.16.0.2 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.2 activate + advertise-all-vni + exit-address-family +exit + +router bgp 65000 vrf tenant + bgp router-id 172.16.0.1 + + address-family ipv4 unicast + redistribute connected + exit-address-family + + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +exit +EOF + +# right side (grout) setup L3VPN ----------------------------------------------- +create_vrf tenant + +# L3 VNI VXLAN in VRF mode (no bridge needed in grout) +grcli interface add vxlan vxlan-l3 vni 1000 local 172.16.0.2 vrf tenant + +create_interface p1 vrf tenant +set_ip_address p1 48.0.0.1/24 + +netns_add host-b +move_to_netns x-p1 host-b +ip -n host-b addr add 48.0.0.2/24 dev x-p1 +ip -n host-b route add default via 48.0.0.1 + +mark_events + +# FRR config on grout +vtysh <<-EOF +configure terminal + +vrf tenant + vni 1000 +exit-vrf + +router bgp 65000 + bgp router-id 172.16.0.2 + no bgp default ipv4-unicast + + neighbor 172.16.0.1 remote-as 65000 + + address-family l2vpn evpn + neighbor 172.16.0.1 activate + advertise-all-vni + exit-address-family +exit + +router bgp 65000 vrf tenant + bgp router-id 172.16.0.2 + + address-family ipv4 unicast + redistribute connected + exit-address-family + + address-family l2vpn evpn + advertise ipv4 unicast + exit-address-family +exit +EOF + +# -- Check L3VNI is recognized by both sides ----------------------------------- +attempts=0 +while ! vtysh -c "show evpn vni 1000" | grep -qF "L3"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show evpn vni" + fail "Grout FRR does not recognize VNI 1000 as L3VNI" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! vtysh -N evpn-peer -c "show evpn vni 1000" | grep -qF "L3"; do + if [ "$attempts" -ge 5 ]; then + vtysh -N evpn-peer -c "show evpn vni" + fail "Linux peer does not recognize VNI 1000 as L3VNI" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Wait for EVPN type-5 route exchange --------------------------------------- +attempts=0 +while ! vtysh -c "show bgp l2vpn evpn route type 5" | grep -qF "16.0.0.0"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show bgp l2vpn evpn route type 5" + fail "Grout FRR did not learn type-5 route for 16.0.0.0/24" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +attempts=0 +while ! vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 5" | grep -qF "48.0.0.0"; do + if [ "$attempts" -ge 5 ]; then + vtysh -c "show bgp vrf tenant ipv4 unicast" + vtysh -c "show bgp l2vpn evpn route" + vtysh -N evpn-peer -c "show bgp l2vpn evpn route type 5" + fail "Linux peer did not learn type-5 route for 48.0.0.0/24" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Wait for routes to be installed in VRF ------------------------------------ +wait_event 'route4 add: vrf=tenant 16.0.0.0/24' + +attempts=0 +while ! ip -n evpn-peer route show vrf tenant | grep -qF "48.0.0.0/24"; do + if [ "$attempts" -ge 5 ]; then + ip -n evpn-peer route show vrf tenant + fail "Route 48.0.0.0/24 not installed in peer VRF tenant" + fi + sleep 1 + attempts=$((attempts + 1)) +done + +# -- Check RMAC is set on the route nexthop ------------------------------------ +rmac=$(ip netns exec evpn-peer cat /sys/class/net/vxlan-l3/address) + +wait_event "nh new: type=L3 id=[0-9]+ iface=vxlan-l3 vrf=tenant origin=zebra family=ipv4 addr=172.16.0.1 mac=$rmac flags=static remote" + +vtysh -c "show bgp l2vpn evpn route type 5" +grcli route show vrf tenant +grcli nexthop show vrf tenant + +# -- Verify L3 connectivity through VXLAN overlay ------------------------------ +ip netns exec host-b ping -i0.1 -c3 -W1 16.0.0.2 +ip netns exec host-a ping -i0.1 -c3 -W1 48.0.0.2