diff --git a/api/v1/clusterwidenetworkpolicy_types.go b/api/v1/clusterwidenetworkpolicy_types.go index 421855da..e291f011 100644 --- a/api/v1/clusterwidenetworkpolicy_types.go +++ b/api/v1/clusterwidenetworkpolicy_types.go @@ -121,6 +121,12 @@ type EgressRule struct { // ToFQDNs rules can't contain To rules. // +optional ToFQDNs []FQDNSelector `json:"toFQDNs,omitempty"` + + // Maximum MSS size for outgoing traffic to the egress destination + // If specified nftables rules for TCP-MSS clamping will be created. + // If none specified no rule will be applied. + // +optional + TcpMss *uint16 `json:"tcpmss,omitempty"` } // FQDNSelector describes rules for matching DNS names. diff --git a/api/v1/zz_generated.deepcopy.go b/api/v1/zz_generated.deepcopy.go index 72826954..23f85d8e 100644 --- a/api/v1/zz_generated.deepcopy.go +++ b/api/v1/zz_generated.deepcopy.go @@ -91,6 +91,11 @@ func (in *EgressRule) DeepCopyInto(out *EgressRule) { *out = make([]FQDNSelector, len(*in)) copy(*out, *in) } + if in.TcpMss != nil { + in, out := &in.TcpMss, &out.TcpMss + *out = new(uint16) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new EgressRule. diff --git a/config/crd/bases/metal-stack.io_clusterwidenetworkpolicies.yaml b/config/crd/bases/metal-stack.io_clusterwidenetworkpolicies.yaml index b7717485..37e2d514 100644 --- a/config/crd/bases/metal-stack.io_clusterwidenetworkpolicies.yaml +++ b/config/crd/bases/metal-stack.io_clusterwidenetworkpolicies.yaml @@ -90,6 +90,11 @@ spec: type: string type: object type: array + tcpmss: + description: Maximum MSS size for outgoing traffic to the egress + destination If specified nftables rules for TCP-MSS clamping + will be created. If none specified no rule will be applied. + type: integer to: description: List of destinations for outgoing traffic of a cluster for this rule. Items in this list are combined using diff --git a/pkg/nftables/firewall.go b/pkg/nftables/firewall.go index 28066b0d..9a4af55c 100644 --- a/pkg/nftables/firewall.go +++ b/pkg/nftables/firewall.go @@ -65,6 +65,7 @@ type nftablesRules []string type forwardingRules struct { Ingress nftablesRules Egress nftablesRules + TcpMss nftablesRules } // NewDefaultFirewall creates a new default nftables firewall. diff --git a/pkg/nftables/networkpolicy.go b/pkg/nftables/networkpolicy.go index 1567867c..4597fbb5 100644 --- a/pkg/nftables/networkpolicy.go +++ b/pkg/nftables/networkpolicy.go @@ -12,6 +12,8 @@ import ( type ruleBase struct { comment string base []string + baseout []string + basein []string } // clusterwideNetworkPolicyRules generates nftables rules for a clusterwidenetworkpolicy @@ -19,11 +21,11 @@ func clusterwideNetworkPolicyRules( cache FQDNCache, np firewallv1.ClusterwideNetworkPolicy, logAcceptedConnections bool, -) (ingress nftablesRules, egress nftablesRules, updated firewallv1.ClusterwideNetworkPolicy) { +) (ingress nftablesRules, egress nftablesRules, tcpmss nftablesRules, updated firewallv1.ClusterwideNetworkPolicy) { updated = np if len(np.Spec.Egress) > 0 { - egress, updated = clusterwideNetworkPolicyEgressRules(cache, np, logAcceptedConnections) + egress, tcpmss, updated = clusterwideNetworkPolicyEgressRules(cache, np, logAcceptedConnections) } if len(np.Spec.Ingress) > 0 { ingress = append(ingress, clusterwideNetworkPolicyIngressRules(np, logAcceptedConnections)...) @@ -64,22 +66,29 @@ func clusterwideNetworkPolicyEgressRules( cache FQDNCache, np firewallv1.ClusterwideNetworkPolicy, logAcceptedConnections bool, -) (rules nftablesRules, updated firewallv1.ClusterwideNetworkPolicy) { +) (rules nftablesRules, tcpmss nftablesRules, updated firewallv1.ClusterwideNetworkPolicy) { for _, e := range np.Spec.Egress { tcpPorts, udpPorts := calculatePorts(e.Ports) + ruleBases := []ruleBase{} if len(e.To) > 0 { allow, except := clusterwideNetworkPolicyEgressToRules(e) rb := []string{"ip saddr == @cluster_prefixes"} + rbmssout := []string{""} + rbmssin := []string{""} if len(except) > 0 { rb = append(rb, fmt.Sprintf("ip daddr != { %s }", strings.Join(except, ", "))) + rbmssout = append(rb, fmt.Sprintf("ip daddr != { %s }", strings.Join(except, ", "))) + rbmssin = append(rb, fmt.Sprintf("ip saddr != { %s }", strings.Join(except, ", "))) } if len(allow) > 0 { if allow[0] != "0.0.0.0/0" { rb = append(rb, fmt.Sprintf("ip daddr { %s }", strings.Join(allow, ", "))) + rbmssout = append(rb, fmt.Sprintf("ip daddr { %s }", strings.Join(allow, ", "))) + rbmssin = append(rb, fmt.Sprintf("ip saddr { %s }", strings.Join(allow, ", "))) } } - ruleBases = append(ruleBases, ruleBase{base: rb}) + ruleBases = append(ruleBases, ruleBase{base: rb, baseout: rbmssin, basein: rbmssout}) } else if len(e.ToFQDNs) > 0 && cache.IsInitialized() { // Generate allow rules based on DNS selectors rbs, u := clusterwideNetworkPolicyEgressToFQDNRules(cache, np.Status.FQDNState, e) @@ -91,6 +100,15 @@ func clusterwideNetworkPolicyEgressRules( for _, rb := range ruleBases { if len(tcpPorts) > 0 { rules = append(rules, assembleDestinationPortRule(rb.base, "tcp", tcpPorts, logAcceptedConnections, comment+" tcp"+rb.comment)) + if e.TcpMss != nil { + tcpmss = append(tcpmss, fmt.Sprintf("%s tcp dport { %s } tcp flags syn tcp option maxseg size set %d", rb.baseout, strings.Join(tcpPorts, ", "), e.TcpMss)) + tcpmss = append(tcpmss, fmt.Sprintf("%s tcp sport { %s } tcp flags syn tcp option maxseg size set %d", rb.basein, strings.Join(tcpPorts, ", "), e.TcpMss)) + } + } else { + if e.TcpMss != nil { + tcpmss = append(tcpmss, fmt.Sprintf("%s tcp flags syn tcp option maxseg size set %d", rb.baseout, e.TcpMss)) + tcpmss = append(tcpmss, fmt.Sprintf("%s tcp flags syn tcp option maxseg size set %d", rb.basein, e.TcpMss)) + } } if len(udpPorts) > 0 { rules = append(rules, assembleDestinationPortRule(rb.base, "udp", udpPorts, logAcceptedConnections, comment+" udp"+rb.comment)) @@ -98,7 +116,7 @@ func clusterwideNetworkPolicyEgressRules( } } - return uniqueSorted(rules), np + return uniqueSorted(rules), uniqueSorted(tcpmss), np } func clusterwideNetworkPolicyEgressToRules(e firewallv1.EgressRule) (allow, except []string) { diff --git a/pkg/nftables/networkpolicy_test.go b/pkg/nftables/networkpolicy_test.go index 4f9dd851..c415c558 100644 --- a/pkg/nftables/networkpolicy_test.go +++ b/pkg/nftables/networkpolicy_test.go @@ -26,8 +26,10 @@ func TestClusterwideNetworkPolicyRules(t *testing.T) { type want struct { ingress nftablesRules egress nftablesRules + tcpmss nftablesRules ingressAL nftablesRules egressAL nftablesRules + tcpmssAL nftablesRules } tests := []struct { @@ -98,6 +100,7 @@ func TestClusterwideNetworkPolicyRules(t *testing.T) { `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } tcp dport { 53, 443-448 } counter accept comment "accept traffic for np tcp"`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } counter accept comment "accept traffic for np udp"`, }, + tcpmss: nftablesRules{}, ingressAL: nftablesRules{ `ip saddr != { 1.1.0.1 } ip saddr { 1.1.0.0/24 } tcp dport { 80, 443-448 } log prefix "nftables-firewall-accepted: " limit rate 10/second`, `ip saddr != { 1.1.0.1 } ip saddr { 1.1.0.0/24 } tcp dport { 80, 443-448 } counter accept comment "accept traffic for k8s network policy tcp"`, @@ -108,27 +111,34 @@ func TestClusterwideNetworkPolicyRules(t *testing.T) { `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } log prefix "nftables-firewall-accepted: " limit rate 10/second`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } counter accept comment "accept traffic for np udp"`, }, + tcpmssAL: nftablesRules{}, }, }, } for _, tt := range tests { tt := tt t.Run(tt.name, func(t *testing.T) { - ingress, egress, _ := clusterwideNetworkPolicyRules(nil, tt.input, false) + ingress, egress, tcpmss, _ := clusterwideNetworkPolicyRules(nil, tt.input, false) if !cmp.Equal(ingress, tt.want.ingress) { t.Errorf("clusterwideNetworkPolicyRules() ingress diff: %v", cmp.Diff(ingress, tt.want.ingress)) } if !cmp.Equal(egress, tt.want.egress) { t.Errorf("clusterwideNetworkPolicyRules() egress diff: %v", cmp.Diff(egress, tt.want.egress)) } + if !cmp.Equal(tcpmss, tt.want.tcpmss) { + t.Errorf("clusterwideNetworkPolicyRules() tcpmss diff: %v", cmp.Diff(tcpmss, tt.want.tcpmss)) + } - ingressAL, egressAL, _ := clusterwideNetworkPolicyRules(nil, tt.input, true) + ingressAL, egressAL, tcpmssAL, _ := clusterwideNetworkPolicyRules(nil, tt.input, true) if !cmp.Equal(ingressAL, tt.want.ingressAL) { t.Errorf("clusterwideNetworkPolicyRules() ingress with accessLog diff: %v", cmp.Diff(ingressAL, tt.want.ingressAL)) } if !cmp.Equal(egressAL, tt.want.egressAL) { t.Errorf("clusterwideNetworkPolicyRules() egress with accessLog diff: %v", cmp.Diff(egressAL, tt.want.egressAL)) } + if !cmp.Equal(tcpmssAL, tt.want.tcpmssAL) { + t.Errorf("clusterwideNetworkPolicyRules() tcpmss with accessLog diff: %v", cmp.Diff(egressAL, tt.want.egressAL)) + } }) } } @@ -139,7 +149,9 @@ func TestClusterwideNetworkPolicyEgressRules(t *testing.T) { type want struct { egress nftablesRules + tcpmss nftablesRules egressAL nftablesRules + tcpmssAL nftablesRules } tests := []struct { @@ -183,12 +195,14 @@ func TestClusterwideNetworkPolicyEgressRules(t *testing.T) { `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } tcp dport { 53 } counter accept comment "accept traffic for np tcp"`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } counter accept comment "accept traffic for np udp"`, }, + tcpmss: nftablesRules{}, egressAL: nftablesRules{ `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } tcp dport { 53 } log prefix "nftables-firewall-accepted: " limit rate 10/second`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } tcp dport { 53 } counter accept comment "accept traffic for np tcp"`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } log prefix "nftables-firewall-accepted: " limit rate 10/second`, `ip saddr == @cluster_prefixes ip daddr != { 1.1.0.1 } ip daddr { 1.1.0.0/24, 1.1.1.0/24 } udp dport { 53 } counter accept comment "accept traffic for np udp"`, }, + tcpmssAL: nftablesRules{}, }, }, { @@ -240,6 +254,7 @@ func TestClusterwideNetworkPolicyEgressRules(t *testing.T) { `ip saddr == @cluster_prefixes ip6 daddr @test2 tcp dport { 53 } counter accept comment "accept traffic for np tcp, fqdn: *.test.com"`, `ip saddr == @cluster_prefixes ip6 daddr @test2 udp dport { 53 } counter accept comment "accept traffic for np udp, fqdn: *.test.com"`, }, + tcpmss: nftablesRules{}, }, }, } @@ -253,17 +268,23 @@ func TestClusterwideNetworkPolicyEgressRules(t *testing.T) { t.Run(tt.name, func(t *testing.T) { tt.record(fqdnCache) if len(tt.want.egress) > 0 { - egress, _ := clusterwideNetworkPolicyEgressRules(fqdnCache, tt.input, false) + egress, tcpmss, _ := clusterwideNetworkPolicyEgressRules(fqdnCache, tt.input, false) if !cmp.Equal(egress, tt.want.egress) { t.Errorf("clusterwideNetworkPolicyEgressRules() diff: %v", cmp.Diff(egress, tt.want.egress)) } + if !cmp.Equal(tcpmss, tt.want.tcpmss) { + t.Errorf("clusterwideNetworkPolicyEgressRules() diff: %v", cmp.Diff(tcpmss, tt.want.tcpmss)) + } } if len(tt.want.egressAL) > 0 { - egressAL, _ := clusterwideNetworkPolicyEgressRules(fqdnCache, tt.input, true) + egressAL, tcpmssAL, _ := clusterwideNetworkPolicyEgressRules(fqdnCache, tt.input, true) if !cmp.Equal(egressAL, tt.want.egressAL) { t.Errorf("clusterwideNetworkPolicyEgressRules() with accessLog diff: %v", cmp.Diff(egressAL, tt.want.egressAL)) } + if !cmp.Equal(tcpmssAL, tt.want.tcpmssAL) { + t.Errorf("clusterwideNetworkPolicyEgressRules() with accessLog diff: %v", cmp.Diff(tcpmssAL, tt.want.tcpmssAL)) + } } }) } diff --git a/pkg/nftables/nftables.tpl b/pkg/nftables/nftables.tpl index 5581c822..4c2b3b74 100644 --- a/pkg/nftables/nftables.tpl +++ b/pkg/nftables/nftables.tpl @@ -45,12 +45,17 @@ table inet firewall { # network traffic accounting for internal traffic ip saddr @internal_prefixes oifname {"vlan{{ .PrivateVrfID }}", "vrf{{ .PrivateVrfID }}"} counter name internal_in comment "count internal traffic incomming" ip daddr @internal_prefixes iifname {"vlan{{ .PrivateVrfID }}", "vrf{{ .PrivateVrfID }}"} counter name internal_out comment "count internal traffic outgoing" - +{{ if gt (len .RateLimitRules) 0 }} # rate limits {{- range .RateLimitRules }} {{ . }} {{- end }} - +{{ end }}{{ if gt (len .ForwardingRules.TcpMss) 0 }} + # TCP-MSS clamping + {{- range .ForwardingRules.TcpMss }} + {{ . }} + {{- end }} +{{ end }} # state dependent rules ct state established,related counter accept comment "accept established connections" ct state invalid counter drop comment "drop packets with invalid ct state" diff --git a/pkg/nftables/rendering.go b/pkg/nftables/rendering.go index 8f75caba..4cd85a17 100644 --- a/pkg/nftables/rendering.go +++ b/pkg/nftables/rendering.go @@ -22,16 +22,17 @@ type firewallRenderingData struct { } func newFirewallRenderingData(f *Firewall) (*firewallRenderingData, error) { - ingress, egress := nftablesRules{}, nftablesRules{} + ingress, egress, tcpmss := nftablesRules{}, nftablesRules{}, nftablesRules{} for ind, np := range f.clusterwideNetworkPolicies.Items { err := np.Spec.Validate() if err != nil { continue } - i, e, u := clusterwideNetworkPolicyRules(f.cache, np, f.logAcceptedConnections) + i, e, t, u := clusterwideNetworkPolicyRules(f.cache, np, f.logAcceptedConnections) ingress = append(ingress, i...) egress = append(egress, e...) + tcpmss = append(tcpmss, t...) f.clusterwideNetworkPolicies.Items[ind] = u } @@ -54,6 +55,7 @@ func newFirewallRenderingData(f *Firewall) (*firewallRenderingData, error) { ForwardingRules: forwardingRules{ Ingress: ingress, Egress: egress, + TcpMss: tcpmss, }, RateLimitRules: rateLimitRules(f), SnatRules: snatRules, diff --git a/pkg/nftables/test_data/validated.nftable.v4 b/pkg/nftables/test_data/validated.nftable.v4 index 154bddb1..ce087b27 100644 --- a/pkg/nftables/test_data/validated.nftable.v4 +++ b/pkg/nftables/test_data/validated.nftable.v4 @@ -37,8 +37,6 @@ table inet firewall { ip saddr @internal_prefixes oifname {"vlan42", "vrf42"} counter name internal_in comment "count internal traffic incomming" ip daddr @internal_prefixes iifname {"vlan42", "vrf42"} counter name internal_out comment "count internal traffic outgoing" - # rate limits - # state dependent rules ct state established,related counter accept comment "accept established connections" ct state invalid counter drop comment "drop packets with invalid ct state"