diff --git a/bindata/network/ovn-kubernetes/common/008-script-lib.yaml b/bindata/network/ovn-kubernetes/common/008-script-lib.yaml index b244e6b3f8..3ac1adc054 100644 --- a/bindata/network/ovn-kubernetes/common/008-script-lib.yaml +++ b/bindata/network/ovn-kubernetes/common/008-script-lib.yaml @@ -526,15 +526,21 @@ data: cni-bin-copy echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on geneve port" - iptables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK - iptables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK - ip6tables -t raw -A PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK - ip6tables -t raw -A OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK + iptables -t raw -D PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true + iptables -t raw -D OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true + ip6tables -t raw -D PREROUTING -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true + ip6tables -t raw -D OUTPUT -p udp --dport {{.GenevePort}} -j NOTRACK 2>/dev/null || true + nft add table inet ovn_notrack + nft flush table inet ovn_notrack + nft 'add chain inet ovn_notrack prerouting { type filter hook prerouting priority raw; policy accept; }' + nft 'add chain inet ovn_notrack output { type filter hook output priority raw; policy accept; }' + nft add rule inet ovn_notrack prerouting udp dport {{.GenevePort}} notrack + nft add rule inet ovn_notrack output udp dport {{.GenevePort}} notrack {{- if .OVNHybridOverlayVXLANPort}} echo "I$(date "+%m%d %H:%M:%S.%N") - disable conntrack on hybrid overlay VXLAN port" - iptables -t raw -A PREROUTING -p udp --dport {{.OVNHybridOverlayVXLANPort}} -j NOTRACK - iptables -t raw -A OUTPUT -p udp --dport {{.OVNHybridOverlayVXLANPort}} -j NOTRACK + nft add rule inet ovn_notrack prerouting udp dport {{.OVNHybridOverlayVXLANPort}} notrack + nft add rule inet ovn_notrack output udp dport {{.OVNHybridOverlayVXLANPort}} notrack {{- end}} echo "I$(date "+%m%d %H:%M:%S.%N") - starting ovnkube-node" diff --git a/bindata/network/ovn-kubernetes/managed/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/managed/ovnkube-node.yaml index f26e34709e..3ba52bb5d0 100644 --- a/bindata/network/ovn-kubernetes/managed/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/managed/ovnkube-node.yaml @@ -486,7 +486,7 @@ spec: - mountPath: /etc/systemd/system name: systemd-units readOnly: true - # for the iptables wrapper + # for the nftables wrapper - mountPath: /host name: host-slash readOnly: true @@ -597,41 +597,35 @@ spec: export KUBECONFIG=/var/run/ovnkube-kubeconfig {{ end }} - touch /var/run/ovn/add_iptables.sh - chmod 0755 /var/run/ovn/add_iptables.sh - cat <<'EOF' > /var/run/ovn/add_iptables.sh + touch /var/run/ovn/add_nft_icmp.sh + chmod 0755 /var/run/ovn/add_nft_icmp.sh + cat <<'EOF' > /var/run/ovn/add_nft_icmp.sh #!/bin/sh if [ -z "$3" ] then echo "Called with host address missing, ignore" exit 0 fi - echo "Adding ICMP drop rule for '$3' " - if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION - then - echo "iptables already set for $3" - else - iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION - fi + echo "Adding ICMP drop rule for '$3'" + nft add element inet azure_icmp icmp_sources "{ $3 }" EOF echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" - iptables -X CHECK_ICMP_SOURCE || true - iptables -N CHECK_ICMP_SOURCE || true - iptables -F CHECK_ICMP_SOURCE - iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true - iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE - iptables -N ICMP_ACTION || true - iptables -F ICMP_ACTION - iptables -A ICMP_ACTION -j LOG - iptables -A ICMP_ACTION -j DROP + iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -F CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -X CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -F ICMP_ACTION 2>/dev/null || true + iptables -X ICMP_ACTION 2>/dev/null || true + nft add table inet azure_icmp + nft flush table inet azure_icmp + nft 'add set inet azure_icmp icmp_sources { type ipv4_addr; }' + nft 'add chain inet azure_icmp input { type filter hook input priority 0; policy accept; }' + nft add rule inet azure_icmp input icmp type destination-unreachable icmp code frag-needed ip saddr @icmp_sources counter log drop # ip addr show ip route show - iptables -nvL - iptables -nvL -t nat - oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh - #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + nft list table inet azure_icmp + oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_nft_icmp.sh lifecycle: preStop: exec: @@ -644,7 +638,6 @@ spec: - mountPath: /etc/ovn/ name: etc-openvswitch {{ end }} - # for the iptables wrapper - mountPath: /host name: host-slash readOnly: true @@ -673,7 +666,7 @@ spec: - name: systemd-units hostPath: path: /etc/systemd/system - # used for iptables wrapper scripts + # used for nftables wrapper scripts - name: host-slash hostPath: path: / diff --git a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml index 18f52c983c..4e5a62aa17 100644 --- a/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml +++ b/bindata/network/ovn-kubernetes/self-hosted/ovnkube-node.yaml @@ -510,7 +510,7 @@ spec: - mountPath: /etc/systemd/system name: systemd-units readOnly: true - # for the iptables wrapper + # for the nftables wrapper - mountPath: /host name: host-slash readOnly: true @@ -603,41 +603,35 @@ spec: export KUBECONFIG=/etc/ovn/kubeconfig {{ end }} - touch /var/run/ovn/add_iptables.sh - chmod 0755 /var/run/ovn/add_iptables.sh - cat <<'EOF' > /var/run/ovn/add_iptables.sh + touch /var/run/ovn/add_nft_icmp.sh + chmod 0755 /var/run/ovn/add_nft_icmp.sh + cat <<'EOF' > /var/run/ovn/add_nft_icmp.sh #!/bin/sh if [ -z "$3" ] then echo "Called with host address missing, ignore" exit 0 fi - echo "Adding ICMP drop rule for '$3' " - if iptables -C CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION - then - echo "iptables already set for $3" - else - iptables -A CHECK_ICMP_SOURCE -p icmp -s $3 -j ICMP_ACTION - fi + echo "Adding ICMP drop rule for '$3'" + nft add element inet azure_icmp icmp_sources "{ $3 }" EOF echo "I$(date "+%m%d %H:%M:%S.%N") - drop-icmp - start drop-icmp ${K8S_NODE}" - iptables -X CHECK_ICMP_SOURCE || true - iptables -N CHECK_ICMP_SOURCE || true - iptables -F CHECK_ICMP_SOURCE - iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE || true - iptables -I INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE - iptables -N ICMP_ACTION || true - iptables -F ICMP_ACTION - iptables -A ICMP_ACTION -j LOG - iptables -A ICMP_ACTION -j DROP + iptables -D INPUT -p icmp --icmp-type fragmentation-needed -j CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -F CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -X CHECK_ICMP_SOURCE 2>/dev/null || true + iptables -F ICMP_ACTION 2>/dev/null || true + iptables -X ICMP_ACTION 2>/dev/null || true + nft add table inet azure_icmp + nft flush table inet azure_icmp + nft 'add set inet azure_icmp icmp_sources { type ipv4_addr; }' + nft 'add chain inet azure_icmp input { type filter hook input priority 0; policy accept; }' + nft add rule inet azure_icmp input icmp type destination-unreachable icmp code frag-needed ip saddr @icmp_sources counter log drop # ip addr show ip route show - iptables -nvL - iptables -nvL -t nat - oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh - #systemd-run -qPG -- oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_iptables.sh + nft list table inet azure_icmp + oc observe pods -n openshift-ovn-kubernetes --listen-addr='' -l app=ovnkube-node -a '{ .status.hostIP }' -- /var/run/ovn/add_nft_icmp.sh lifecycle: preStop: exec: @@ -650,7 +644,6 @@ spec: - mountPath: /etc/ovn/ name: etc-openvswitch {{ end }} - # for the iptables wrapper - mountPath: /host name: host-slash readOnly: true @@ -679,7 +672,7 @@ spec: - name: systemd-units hostPath: path: /etc/systemd/system - # used for iptables wrapper scripts + # used for nftables wrapper scripts - name: host-slash hostPath: path: / diff --git a/pkg/util/k8s/kubeproxy.go b/pkg/util/k8s/kubeproxy.go index 3099a9a7b1..9f11e9445d 100644 --- a/pkg/util/k8s/kubeproxy.go +++ b/pkg/util/k8s/kubeproxy.go @@ -63,6 +63,11 @@ func GenerateKubeProxyConfiguration(args map[string]operv1.ProxyArgumentList) (s kpc.IPTables.SyncPeriod.Duration = ka.getDuration("iptables-sync-period") kpc.IPTables.MinSyncPeriod.Duration = ka.getDuration("iptables-min-sync-period") + kpc.NFTables.MasqueradeBit = ka.getOptInt32("nftables-masquerade-bit") + kpc.NFTables.MasqueradeAll = ka.getBool("nftables-masquerade-all") + kpc.NFTables.SyncPeriod.Duration = ka.getDuration("nftables-sync-period") + kpc.NFTables.MinSyncPeriod.Duration = ka.getDuration("nftables-min-sync-period") + kpc.IPVS.SyncPeriod.Duration = ka.getDuration("ipvs-sync-period") kpc.IPVS.MinSyncPeriod.Duration = ka.getDuration("ipvs-min-sync-period") kpc.IPVS.Scheduler = ka.getString("ipvs-scheduler") diff --git a/pkg/util/k8s/kubeproxy_test.go b/pkg/util/k8s/kubeproxy_test.go index f1a8f30e85..2b2a1371f7 100644 --- a/pkg/util/k8s/kubeproxy_test.go +++ b/pkg/util/k8s/kubeproxy_test.go @@ -396,6 +396,85 @@ nodePortAddresses: null oomScoreAdj: null portRange: 1000+10 showHiddenMetricsForVersion: "" +winkernel: + enableDSR: false + forwardHealthCheckVip: false + networkName: "" + rootHnsEndpointName: "" + sourceVip: "" +`, + }, + { + description: "nftables overrides", + overrides: map[string]operv1.ProxyArgumentList{ + "proxy-mode": {"nftables"}, + "nftables-masquerade-bit": {"14"}, + "nftables-masquerade-all": {"true"}, + "nftables-sync-period": {"30s"}, + "nftables-min-sync-period": {"10s"}, + }, + output: ` +apiVersion: kubeproxy.config.k8s.io/v1alpha1 +bindAddress: 0.0.0.0 +bindAddressHardFail: false +clientConnection: + acceptContentTypes: "" + burst: 0 + contentType: "" + kubeconfig: "" + qps: 0 +clusterCIDR: "" +configSyncPeriod: 0s +conntrack: + maxPerCore: null + min: null + tcpBeLiberal: false + tcpCloseWaitTimeout: null + tcpEstablishedTimeout: null + udpStreamTimeout: 0s + udpTimeout: 0s +detectLocal: + bridgeInterface: "" + interfaceNamePrefix: "" +detectLocalMode: "" +enableProfiling: false +healthzBindAddress: "" +hostnameOverride: "" +iptables: + localhostNodePorts: null + masqueradeAll: false + masqueradeBit: 0 + minSyncPeriod: 0s + syncPeriod: 0s +ipvs: + excludeCIDRs: null + minSyncPeriod: 0s + scheduler: "" + strictARP: false + syncPeriod: 0s + tcpFinTimeout: 0s + tcpTimeout: 0s + udpTimeout: 0s +kind: KubeProxyConfiguration +logging: + flushFrequency: 0 + options: + json: + infoBufferSize: "0" + text: + infoBufferSize: "0" + verbosity: 0 +metricsBindAddress: 0.0.0.0:9102 +mode: nftables +nftables: + masqueradeAll: true + masqueradeBit: 14 + minSyncPeriod: 10s + syncPeriod: 30s +nodePortAddresses: null +oomScoreAdj: null +portRange: "" +showHiddenMetricsForVersion: "" winkernel: enableDSR: false forwardHealthCheckVip: false