-
Notifications
You must be signed in to change notification settings - Fork 295
CORENET-7125: iptables to nftables #3038
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -526,15 +526,21 @@ data: | |
| cni-bin-copy | ||
|
|
||
| echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" | ||
| iptables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK | ||
| iptables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK | ||
| ip6tables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK | ||
| ip6tables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK | ||
| iptables -t raw -D PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true | ||
| iptables -t raw -D OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true | ||
| ip6tables -t raw -D PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true | ||
| ip6tables -t raw -D OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true | ||
| nft add table inet ovn_notrack | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add a |
||
| nft flush table inet ovn_notrack | ||
| nft 'add chain inet ovn_notrack prerouting { type filter hook prerouting priority raw; policy accept; }' | ||
| nft 'add chain inet ovn_notrack output { type filter hook output priority raw; policy accept; }' | ||
| nft add rule inet ovn_notrack prerouting udp dport {{.GenevePort}} notrack | ||
| nft add rule inet ovn_notrack output udp dport {{.GenevePort}} notrack | ||
|
|
||
| {{- if .OVNHybridOverlayVXLANPort}} | ||
| echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on hybrid overlay VXLAN port" | ||
| iptables -t raw -A PREROUTING -p udp --dport {{.OVNHybridOverlayVXLANPort}} -j NOTRACK | ||
| iptables -t raw -A OUTPUT -p udp --dport {{.OVNHybridOverlayVXLANPort}} -j NOTRACK | ||
| nft add rule inet ovn_notrack prerouting udp dport {{.OVNHybridOverlayVXLANPort}} notrack | ||
| nft add rule inet ovn_notrack output udp dport {{.OVNHybridOverlayVXLANPort}} notrack | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| {{- end}} | ||
|
|
||
| echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -486,7 +486,7 @@ spec: | |
| - mountPath: /etc/systemd/system | ||
| name: systemd-units | ||
| readOnly: true | ||
| # for the iptables wrapper | ||
| # for the nftables wrapper | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. This was talking about the old iptables wrapper scripts to run the iptables binaries off the host filesystem, but we haven't used those since we stopped supporting RHEL 7... you should be able to just remove this mountpoint. (It looks like nothing else uses it.) |
||
| - mountPath: /host | ||
| name: host-slash | ||
| readOnly: true | ||
|
|
@@ -597,41 +597,35 @@ spec: | |
| export KUBECONFIG=/var/run/ovnkube-kubeconfig | ||
| {{ end }} | ||
|
|
||
| touch /var/run/ovn/add_iptables.sh | ||
| chmod 0755 /var/run/ovn/add_iptables.sh | ||
| cat <<'EOF' > /var/run/ovn/add_iptables.sh | ||
| touch /var/run/ovn/add_nft_icmp.sh | ||
| chmod 0755 /var/run/ovn/add_nft_icmp.sh | ||
| cat <<'EOF' > /var/run/ovn/add_nft_icmp.sh | ||
| #!/bin/sh | ||
| if [ -z "$3" ] | ||
| then | ||
| echo "Called with host address missing, ignore" | ||
| exit 0 | ||
| fi | ||
| echo "Adding ICMP drop rule for '$3' " | ||
| if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION | ||
| then | ||
| echo "iptables already set for $3" | ||
| else | ||
| iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION | ||
| fi | ||
| echo "Adding ICMP drop rule for '$3'" | ||
| nft add element inet azure_icmp icmp_sources "{ $3 }" | ||
| EOF | ||
|
|
||
| echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" | ||
| iptables -X CHECK_ICMP_SOURCE || true | ||
| iptables -N CHECK_ICMP_SOURCE || true | ||
| iptables -F CHECK_ICMP_SOURCE | ||
| iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true | ||
| iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE | ||
| iptables -N ICMP_ACTION || true | ||
| iptables -F ICMP_ACTION | ||
| iptables -A ICMP_ACTION -j LOG | ||
| iptables -A ICMP_ACTION -j DROP | ||
| iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE 2>/dev/null || true | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. as above, add a comment explaining that this is cleanup and can eventually go away |
||
| iptables -F CHECK_ICMP_SOURCE 2>/dev/null || true | ||
| iptables -X CHECK_ICMP_SOURCE 2>/dev/null || true | ||
| iptables -F ICMP_ACTION 2>/dev/null || true | ||
| iptables -X ICMP_ACTION 2>/dev/null || true | ||
| nft add table inet azure_icmp | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. could be |
||
| nft flush table inet azure_icmp | ||
| nft 'add set inet azure_icmp icmp_sources { type ipv4_addr; }' | ||
| nft 'add chain inet azure_icmp input { type filter hook input priority 0; policy accept; }' | ||
| nft add rule inet azure_icmp input icmp type destination-unreachable icmp code frag-needed ip saddr @icmp_sources counter log drop | ||
|
Comment on lines
+600
to
+623
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win Make the Azure nftables programming restart-safe.
Proposed fix echo "Adding ICMP drop rule for '$3'"
- nft add element inet azure_icmp icmp_sources "{ $3 }"
+ nft add element inet azure_icmp icmp_sources "{ $3 }" 2>/dev/null || true
EOF
echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}"
+ nft delete table inet azure_icmp 2>/dev/null || true
nft add table inet azure_icmpAlso applies to: 623-623 🤖 Prompt for AI Agents
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nft add element is also idempotent for duplicate elements There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
The remaining issue is just the restart path for: nft add table inet azure_icmpSince |
||
| # | ||
| ip addr show | ||
| ip route show | ||
| iptables -nvL | ||
| iptables -nvL -t nat | ||
| oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh | ||
| #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh | ||
| nft list table inet azure_icmp | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_nft_icmp.sh | ||
| lifecycle: | ||
| preStop: | ||
| exec: | ||
|
|
@@ -644,7 +638,6 @@ spec: | |
| - mountPath: /etc/ovn/ | ||
| name: etc-openvswitch | ||
| {{ end }} | ||
| # for the iptables wrapper | ||
| - mountPath: /host | ||
| name: host-slash | ||
| readOnly: true | ||
|
|
@@ -673,7 +666,7 @@ spec: | |
| - name: systemd-units | ||
| hostPath: | ||
| path: /etc/systemd/system | ||
| # used for iptables wrapper scripts | ||
| # used for nftables wrapper scripts | ||
| - name: host-slash | ||
| hostPath: | ||
| path: / | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -510,7 +510,7 @@ spec: | |
| - mountPath: /etc/systemd/system | ||
| name: systemd-units | ||
| readOnly: true | ||
| # for the iptables wrapper | ||
| # for the nftables wrapper | ||
| - mountPath: /host | ||
| name: host-slash | ||
| readOnly: true | ||
|
|
@@ -603,41 +603,35 @@ spec: | |
| export KUBECONFIG=/etc/ovn/kubeconfig | ||
| {{ end }} | ||
|
|
||
| touch /var/run/ovn/add_iptables.sh | ||
| chmod 0755 /var/run/ovn/add_iptables.sh | ||
| cat <<'EOF' > /var/run/ovn/add_iptables.sh | ||
| touch /var/run/ovn/add_nft_icmp.sh | ||
| chmod 0755 /var/run/ovn/add_nft_icmp.sh | ||
| cat <<'EOF' > /var/run/ovn/add_nft_icmp.sh | ||
| #!/bin/sh | ||
| if [ -z "$3" ] | ||
| then | ||
| echo "Called with host address missing, ignore" | ||
| exit 0 | ||
| fi | ||
| echo "Adding ICMP drop rule for '$3' " | ||
| if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION | ||
| then | ||
| echo "iptables already set for $3" | ||
| else | ||
| iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION | ||
| fi | ||
| echo "Adding ICMP drop rule for '$3'" | ||
| nft add element inet azure_icmp icmp_sources "{ $3 }" | ||
| EOF | ||
|
|
||
| echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" | ||
| iptables -X CHECK_ICMP_SOURCE || true | ||
| iptables -N CHECK_ICMP_SOURCE || true | ||
| iptables -F CHECK_ICMP_SOURCE | ||
| iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true | ||
| iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE | ||
| iptables -N ICMP_ACTION || true | ||
| iptables -F ICMP_ACTION | ||
| iptables -A ICMP_ACTION -j LOG | ||
| iptables -A ICMP_ACTION -j DROP | ||
| iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE 2>/dev/null || true | ||
| iptables -F CHECK_ICMP_SOURCE 2>/dev/null || true | ||
| iptables -X CHECK_ICMP_SOURCE 2>/dev/null || true | ||
| iptables -F ICMP_ACTION 2>/dev/null || true | ||
| iptables -X ICMP_ACTION 2>/dev/null || true | ||
| nft add table inet azure_icmp | ||
| nft flush table inet azure_icmp | ||
| nft 'add set inet azure_icmp icmp_sources { type ipv4_addr; }' | ||
| nft 'add chain inet azure_icmp input { type filter hook input priority 0; policy accept; }' | ||
| nft add rule inet azure_icmp input icmp type destination-unreachable icmp code frag-needed ip saddr @icmp_sources counter log drop | ||
|
Comment on lines
+606
to
+629
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🩺 Stability & Availability | 🟠 Major | ⚡ Quick win Make the Azure nftables programming restart-safe.
Proposed fix echo "Adding ICMP drop rule for '$3'"
- nft add element inet azure_icmp icmp_sources "{ $3 }"
+ nft add element inet azure_icmp icmp_sources "{ $3 }" 2>/dev/null || true
EOF
echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}"
+ nft delete table inet azure_icmp 2>/dev/null || true
nft add table inet azure_icmpAlso applies to: 629-629 🤖 Prompt for AI Agents
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. nft add table is idempotent, and we run a flush after the add which will correctly handle the restart |
||
| # | ||
| ip addr show | ||
| ip route show | ||
| iptables -nvL | ||
| iptables -nvL -t nat | ||
| oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh | ||
| #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh | ||
| nft list table inet azure_icmp | ||
|
coderabbitai[bot] marked this conversation as resolved.
|
||
| oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_nft_icmp.sh | ||
| lifecycle: | ||
| preStop: | ||
| exec: | ||
|
|
@@ -650,7 +644,6 @@ spec: | |
| - mountPath: /etc/ovn/ | ||
| name: etc-openvswitch | ||
| {{ end }} | ||
| # for the iptables wrapper | ||
| - mountPath: /host | ||
| name: host-slash | ||
| readOnly: true | ||
|
|
@@ -679,7 +672,7 @@ spec: | |
| - name: systemd-units | ||
| hostPath: | ||
| path: /etc/systemd/system | ||
| # used for iptables wrapper scripts | ||
| # used for nftables wrapper scripts | ||
| - name: host-slash | ||
| hostPath: | ||
| path: / | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -63,6 +63,11 @@ func GenerateKubeProxyConfiguration(args map[string]operv1.ProxyArgumentList) (s | |
| kpc.IPTables.SyncPeriod.Duration = ka.getDuration("iptables-sync-period") | ||
| kpc.IPTables.MinSyncPeriod.Duration = ka.getDuration("iptables-min-sync-period") | ||
|
|
||
| kpc.NFTables.MasqueradeBit = ka.getOptInt32("nftables-masquerade-bit") | ||
| kpc.NFTables.MasqueradeAll = ka.getBool("nftables-masquerade-all") | ||
| kpc.NFTables.SyncPeriod.Duration = ka.getDuration("nftables-sync-period") | ||
| kpc.NFTables.MinSyncPeriod.Duration = ka.getDuration("nftables-min-sync-period") | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. add handling for also, we should pre-emptively adjust the validation for |
||
|
|
||
| kpc.IPVS.SyncPeriod.Duration = ka.getDuration("ipvs-sync-period") | ||
| kpc.IPVS.MinSyncPeriod.Duration = ka.getDuration("ipvs-min-sync-period") | ||
| kpc.IPVS.Scheduler = ka.getString("ipvs-scheduler") | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
add a comment explaining that this is to clean up old iptables rules and can eventually go away