From 2bb2b66f957752adddfbfd55894badf30089fba4 Mon Sep 17 00:00:00 2001 From: Tom Date: Mon, 28 Jan 2019 20:56:40 -0800 Subject: [PATCH] Implement support for anonymous & named sets. --- .travis.yml | 4 +- expr/lookup.go | 60 +++++++ nftables.go | 34 ++-- nftables_test.go | 283 ++++++++++++++++++++++++++++++++ set.go | 414 +++++++++++++++++++++++++++++++++++++++++++++++ 5 files changed, 771 insertions(+), 24 deletions(-) create mode 100644 expr/lookup.go create mode 100644 set.go diff --git a/.travis.yml b/.travis.yml index 91114f5..62afe6e 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,6 +1,6 @@ # Use the (faster) container-based infrastructure, see also # http://docs.travis-ci.com/user/workers/container-based-infrastructure/ -sudo: false +sudo: true dist: trusty language: go @@ -14,3 +14,5 @@ script: - "gosrc=$(find . -name '*.go' | tr '\\n' ' '); [ $(gofmt -l $gosrc 2>&- | wc -l) -eq 0 ] || (echo 'gofmt was not run on these files:'; gofmt -l $gosrc 2>&-; false)" - go tool vet . - go test ./... + - go test -c github.com/google/nftables + - sudo ./nftables.test -test.v -run_system_tests diff --git a/expr/lookup.go b/expr/lookup.go new file mode 100644 index 0000000..50dd5fc --- /dev/null +++ b/expr/lookup.go @@ -0,0 +1,60 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package expr + +import ( + "fmt" + + "github.com/google/nftables/binaryutil" + "github.com/mdlayher/netlink" + "golang.org/x/sys/unix" +) + +// Lookup represents a match against the contents of a set. +type Lookup struct { + SourceRegister uint32 + DestRegister uint32 + + SetID uint32 + SetName string +} + +func (e *Lookup) marshal() ([]byte, error) { + // See: https://git.netfilter.org/libnftnl/tree/src/expr/lookup.c?id=6dc1c3d8bb64077da7f3f28c7368fb087d10a492#n115 + var opAttrs []netlink.Attribute + if e.SourceRegister != 0 { + opAttrs = append(opAttrs, netlink.Attribute{Type: unix.NFTA_LOOKUP_SREG, Data: binaryutil.BigEndian.PutUint32(e.SourceRegister)}) + } + if e.DestRegister != 0 { + opAttrs = append(opAttrs, netlink.Attribute{Type: unix.NFTA_LOOKUP_DREG, Data: binaryutil.BigEndian.PutUint32(e.DestRegister)}) + } + opAttrs = append(opAttrs, + netlink.Attribute{Type: unix.NFTA_LOOKUP_SET, Data: []byte(e.SetName + "\x00")}, + netlink.Attribute{Type: unix.NFTA_LOOKUP_SET_ID, Data: binaryutil.BigEndian.PutUint32(e.SetID)}, + ) + opData, err := netlink.MarshalAttributes(opAttrs) + if err != nil { + return nil, err + } + + return netlink.MarshalAttributes([]netlink.Attribute{ + {Type: unix.NFTA_EXPR_NAME, Data: []byte("lookup\x00")}, + {Type: unix.NLA_F_NESTED | unix.NFTA_EXPR_DATA, Data: opData}, + }) +} + +func (e *Lookup) unmarshal(data []byte) error { + return fmt.Errorf("not yet implemented") +} diff --git a/nftables.go b/nftables.go index 3c93f65..6fd9649 100644 --- a/nftables.go +++ b/nftables.go @@ -313,6 +313,7 @@ type Table struct { // Commands are buffered. Flush sends all buffered commands in a single batch. type Conn struct { TestDial nltest.Func // for testing only; passed to nltest.Dial + NetNS int // Network namespace netlink will interact with. messages []netlink.Message err error } @@ -366,13 +367,7 @@ func (cc *Conn) Flush() error { if cc.err != nil { return cc.err // serialization error } - var conn *netlink.Conn - var err error - if cc.TestDial == nil { - conn, err = netlink.Dial(unix.NETLINK_NETFILTER, nil) - } else { - conn = nltest.Dial(cc.TestDial) - } + conn, err := cc.dialNetlink() if err != nil { return err } @@ -394,17 +389,10 @@ func (cc *Conn) Flush() error { // GetRule returns the rules in the specified table and chain. func (cc *Conn) GetRule(t *Table, c *Chain) ([]*Rule, error) { - var conn *netlink.Conn - var err error - if cc.TestDial == nil { - conn, err = netlink.Dial(unix.NETLINK_NETFILTER, nil) - } else { - conn = nltest.Dial(cc.TestDial) - } + conn, err := cc.dialNetlink() if err != nil { return nil, err } - defer conn.Close() data, err := netlink.MarshalAttributes([]netlink.Attribute{ @@ -566,18 +554,18 @@ func objFromMsg(msg netlink.Message) (Obj, error) { return nil, fmt.Errorf("malformed stateful object") } -func (cc *Conn) getObj(o Obj, msgType uint16) ([]Obj, error) { - var conn *netlink.Conn - var err error - if cc.TestDial == nil { - conn, err = netlink.Dial(unix.NETLINK_NETFILTER, nil) - } else { - conn = nltest.Dial(cc.TestDial) +func (cc *Conn) dialNetlink() (*netlink.Conn, error) { + if cc.TestDial != nil { + return nltest.Dial(cc.TestDial), nil } + return netlink.Dial(unix.NETLINK_NETFILTER, &netlink.Config{NetNS: cc.NetNS}) +} + +func (cc *Conn) getObj(o Obj, msgType uint16) ([]Obj, error) { + conn, err := cc.dialNetlink() if err != nil { return nil, err } - defer conn.Close() data, err := o.marshal(false) diff --git a/nftables_test.go b/nftables_test.go index 0b89675..04cdafd 100644 --- a/nftables_test.go +++ b/nftables_test.go @@ -16,8 +16,10 @@ package nftables_test import ( "bytes" + "flag" "fmt" "net" + "runtime" "strings" "testing" @@ -25,9 +27,14 @@ import ( "github.com/google/nftables/binaryutil" "github.com/google/nftables/expr" "github.com/mdlayher/netlink" + "github.com/vishvananda/netns" "golang.org/x/sys/unix" ) +var ( + enableSysTests = flag.Bool("run_system_tests", false, "Run tests that operate against the live kernel") +) + // nfdump returns a hexdump of 4 bytes per line (like nft --debug=all), allowing // users to make sense of large byte literals more easily. func nfdump(b []byte) string { @@ -74,6 +81,35 @@ func ifname(n string) []byte { return b } +// openSystemNFTConn returns a netlink connection that tests against +// the running kernel in a separate network namespace. +// cleanupSystemNFTConn() must be called from a defer to cleanup +// created network namespace. +func openSystemNFTConn(t *testing.T) (*nftables.Conn, netns.NsHandle) { + t.Helper() + if !*enableSysTests { + t.SkipNow() + } + // We lock the goroutine into the current thread, as namespace operations + // such as those invoked by `netns.New()` are thread-local. This is undone + // in cleanupSystemNFTConn(). + runtime.LockOSThread() + + ns, err := netns.New() + if err != nil { + t.Fatalf("netns.New() failed: %v", err) + } + return &nftables.Conn{NetNS: int(ns)}, ns +} + +func cleanupSystemNFTConn(t *testing.T, newNS netns.NsHandle) { + defer runtime.UnlockOSThread() + + if err := newNS.Close(); err != nil { + t.Fatalf("newNS.Close() failed: %v", err) + } +} + func TestConfigureNAT(t *testing.T) { // The want byte sequences come from stracing nft(8), e.g.: // strace -f -v -x -s 2048 -eraw=sendto nft add table ip nat @@ -829,3 +865,250 @@ func TestDropVerdict(t *testing.T) { t.Fatal(err) } } + +func TestCreateUseAnonymousSet(t *testing.T) { + // The want byte sequences come from stracing nft(8), e.g.: + // strace -f -v -x -s 2048 -eraw=sendto nft add table ip nat + // + // The nft(8) command sequence was taken from: + // https://wiki.nftables.org/wiki-nftables/index.php/Mangle_TCP_options + want := [][]byte{ + // batch begin + []byte("\x00\x00\x00\x0a"), + // nft flush ruleset + []byte("\x00\x00\x00\x00"), + // nft add table ip filter + []byte("\x02\x00\x00\x00\x0b\x00\x01\x00\x66\x69\x6c\x74\x65\x72\x00\x00\x08\x00\x02\x00\x00\x00\x00\x00"), + // Create anonymous set with key len of 2 bytes and data len of 0 bytes + []byte("\x02\x00\x00\x00\x0b\x00\x01\x00\x66\x69\x6c\x74\x65\x72\x00\x00\x0c\x00\x02\x00\x5f\x5f\x73\x65\x74\x25\x64\x00\x08\x00\x03\x00\x00\x00\x00\x03\x08\x00\x04\x00\x00\x00\x00\x0d\x08\x00\x05\x00\x00\x00\x00\x02\x08\x00\x0a\x00\x00\x00\x00\x01\x0c\x00\x09\x80\x08\x00\x01\x00\x00\x00\x00\x02\x0a\x00\x0d\x00\x00\x04\x02\x00\x00\x00\x00\x00"), + // Assign the two values to the aforementioned anonymous set + []byte("\x02\x00\x00\x00\x0c\x00\x02\x00\x5f\x5f\x73\x65\x74\x25\x64\x00\x08\x00\x04\x00\x00\x00\x00\x01\x0b\x00\x01\x00\x66\x69\x6c\x74\x65\x72\x00\x00\x24\x00\x03\x80\x10\x00\x01\x80\x0c\x00\x01\x80\x06\x00\x01\x00\x00\x45\x00\x00\x10\x00\x02\x80\x0c\x00\x01\x80\x06\x00\x01\x00\x04\x8b\x00\x00"), + // nft add rule filter forward tcp dport {69, 1163} drop + []byte("\x02\x00\x00\x00\x0b\x00\x01\x00\x66\x69\x6c\x74\x65\x72\x00\x00\x0c\x00\x02\x00\x66\x6f\x72\x77\x61\x72\x64\x00\xe8\x00\x04\x80\x24\x00\x01\x80\x09\x00\x01\x00\x6d\x65\x74\x61\x00\x00\x00\x00\x14\x00\x02\x80\x08\x00\x02\x00\x00\x00\x00\x10\x08\x00\x01\x00\x00\x00\x00\x01\x2c\x00\x01\x80\x08\x00\x01\x00\x63\x6d\x70\x00\x20\x00\x02\x80\x08\x00\x01\x00\x00\x00\x00\x01\x08\x00\x02\x00\x00\x00\x00\x00\x0c\x00\x03\x80\x05\x00\x01\x00\x06\x00\x00\x00\x34\x00\x01\x80\x0c\x00\x01\x00\x70\x61\x79\x6c\x6f\x61\x64\x00\x24\x00\x02\x80\x08\x00\x01\x00\x00\x00\x00\x01\x08\x00\x02\x00\x00\x00\x00\x02\x08\x00\x03\x00\x00\x00\x00\x02\x08\x00\x04\x00\x00\x00\x00\x02\x30\x00\x01\x80\x0b\x00\x01\x00\x6c\x6f\x6f\x6b\x75\x70\x00\x00\x20\x00\x02\x80\x08\x00\x02\x00\x00\x00\x00\x01\x0c\x00\x01\x00\x5f\x5f\x73\x65\x74\x25\x64\x00\x08\x00\x04\x00\x00\x00\x00\x01\x30\x00\x01\x80\x0e\x00\x01\x00\x69\x6d\x6d\x65\x64\x69\x61\x74\x65\x00\x00\x00\x1c\x00\x02\x80\x08\x00\x01\x00\x00\x00\x00\x00\x10\x00\x02\x80\x0c\x00\x02\x80\x08\x00\x01\x00\x00\x00\x00\x00"), + // batch end + []byte("\x00\x00\x00\x0a"), + } + + c := &nftables.Conn{ + TestDial: func(req []netlink.Message) ([]netlink.Message, error) { + for idx, msg := range req { + b, err := msg.MarshalBinary() + if err != nil { + t.Fatal(err) + } + if len(b) < 16 { + continue + } + b = b[16:] + if len(want) == 0 { + t.Errorf("no want entry for message %d: %x", idx, b) + continue + } + if got, want := b, want[0]; !bytes.Equal(got, want) { + t.Errorf("message %d: %s", idx, linediff(nfdump(got), nfdump(want))) + } + want = want[1:] + } + return req, nil + }, + } + + c.FlushRuleset() + + filter := c.AddTable(&nftables.Table{ + Family: nftables.TableFamilyIPv4, + Name: "filter", + }) + + set := &nftables.Set{ + Anonymous: true, + Constant: true, + Table: filter, + KeyType: nftables.TypeInetService, + } + + if err := c.AddSet(set, []nftables.SetElement{ + {Key: binaryutil.BigEndian.PutUint16(69)}, + {Key: binaryutil.BigEndian.PutUint16(1163)}, + }); err != nil { + t.Errorf("c.AddSet() failed: %v", err) + } + + c.AddRule(&nftables.Rule{ + Table: filter, + Chain: &nftables.Chain{Name: "forward", Type: nftables.ChainTypeFilter}, + Exprs: []expr.Any{ + // [ meta load l4proto => reg 1 ] + &expr.Meta{Key: expr.MetaKeyL4PROTO, Register: 1}, + // [ cmp eq reg 1 0x00000006 ] + &expr.Cmp{ + Op: expr.CmpOpEq, + Register: 1, + Data: []byte{unix.IPPROTO_TCP}, + }, + + // [ payload load 2b @ transport header + 2 => reg 1 ] + &expr.Payload{ + DestRegister: 1, + Base: expr.PayloadBaseTransportHeader, + Offset: 2, + Len: 2, + }, + // [ lookup reg 1 set __set%d ] + &expr.Lookup{ + SourceRegister: 1, + SetName: set.Name, + SetID: set.ID, + }, + // [ immediate reg 0 drop ] + &expr.Verdict{ + Kind: expr.VerdictDrop, + }, + }, + }) + + if err := c.Flush(); err != nil { + t.Fatal(err) + } +} + +func TestCreateUseNamedSet(t *testing.T) { + // Create a new network namespace to test these operations, + // and tear down the namespace at test completion. + c, newNS := openSystemNFTConn(t) + defer cleanupSystemNFTConn(t, newNS) + // Clear all rules at the beginning + end of the test. + c.FlushRuleset() + defer c.FlushRuleset() + + filter := c.AddTable(&nftables.Table{ + Family: nftables.TableFamilyIPv4, + Name: "filter", + }) + + portSet := &nftables.Set{ + Table: filter, + Name: "kek", + KeyType: nftables.TypeInetService, + } + if err := c.AddSet(portSet, nil); err != nil { + t.Errorf("c.AddSet(portSet) failed: %v", err) + } + if err := c.SetAddElements(portSet, []nftables.SetElement{{Key: binaryutil.BigEndian.PutUint16(22)}}); err != nil { + t.Errorf("c.SetVal(portSet) failed: %v", err) + } + + ipSet := &nftables.Set{ + Table: filter, + Name: "IPs_4_dayz", + KeyType: nftables.TypeIPAddr, + } + if err := c.AddSet(ipSet, []nftables.SetElement{{Key: []byte(net.ParseIP("192.168.1.64").To4())}}); err != nil { + t.Errorf("c.AddSet(ipSet) failed: %v", err) + } + if err := c.SetAddElements(ipSet, []nftables.SetElement{{Key: []byte(net.ParseIP("192.168.1.42").To4())}}); err != nil { + t.Errorf("c.SetVal(ipSet) failed: %v", err) + } + if err := c.Flush(); err != nil { + t.Errorf("c.Flush() failed: %v", err) + } + + sets, err := c.GetSets(filter) + if err != nil { + t.Errorf("c.GetSets() failed: %v", err) + } + if len(sets) != 2 { + t.Fatalf("len(sets) = %d, want 2", len(sets)) + } + if sets[0].Name != "kek" { + t.Errorf("set[0].Name = %q, want kek", sets[0].Name) + } + if sets[1].Name != "IPs_4_dayz" { + t.Errorf("set[1].Name = %q, want IPs_4_dayz", sets[1].Name) + } +} + +func TestCreateDeleteNamedSet(t *testing.T) { + // Create a new network namespace to test these operations, + // and tear down the namespace at test completion. + c, newNS := openSystemNFTConn(t) + defer cleanupSystemNFTConn(t, newNS) + // Clear all rules at the beginning + end of the test. + c.FlushRuleset() + defer c.FlushRuleset() + + filter := c.AddTable(&nftables.Table{ + Family: nftables.TableFamilyIPv4, + Name: "filter", + }) + + portSet := &nftables.Set{ + Table: filter, + Name: "kek", + KeyType: nftables.TypeInetService, + } + if err := c.AddSet(portSet, nil); err != nil { + t.Errorf("c.AddSet(portSet) failed: %v", err) + } + if err := c.Flush(); err != nil { + t.Errorf("c.Flush() failed: %v", err) + } + + c.DelSet(portSet) + + if err := c.Flush(); err != nil { + t.Errorf("Second c.Flush() failed: %v", err) + } + + sets, err := c.GetSets(filter) + if err != nil { + t.Errorf("c.GetSets() failed: %v", err) + } + if len(sets) != 0 { + t.Fatalf("len(sets) = %d, want 0", len(sets)) + } +} + +func TestDeleteElementNamedSet(t *testing.T) { + // Create a new network namespace to test these operations, + // and tear down the namespace at test completion. + c, newNS := openSystemNFTConn(t) + defer cleanupSystemNFTConn(t, newNS) + // Clear all rules at the beginning + end of the test. + c.FlushRuleset() + defer c.FlushRuleset() + + filter := c.AddTable(&nftables.Table{ + Family: nftables.TableFamilyIPv4, + Name: "filter", + }) + + portSet := &nftables.Set{ + Table: filter, + Name: "kek", + KeyType: nftables.TypeInetService, + } + if err := c.AddSet(portSet, []nftables.SetElement{{Key: []byte{0, 22}}, {Key: []byte{0, 23}}}); err != nil { + t.Errorf("c.AddSet(portSet) failed: %v", err) + } + if err := c.Flush(); err != nil { + t.Errorf("c.Flush() failed: %v", err) + } + + c.SetDeleteElements(portSet, []nftables.SetElement{{Key: []byte{0, 23}}}) + + if err := c.Flush(); err != nil { + t.Errorf("Second c.Flush() failed: %v", err) + } + + elems, err := c.GetSetElements(portSet) + if err != nil { + t.Errorf("c.GetSets() failed: %v", err) + } + if len(elems) != 1 { + t.Fatalf("len(elems) = %d, want 1", len(elems)) + } + if !bytes.Equal(elems[0].Key, []byte{0, 22}) { + t.Errorf("elems[0].Key = %v, want 22", elems[0].Key) + } +} diff --git a/set.go b/set.go new file mode 100644 index 0000000..5fd079f --- /dev/null +++ b/set.go @@ -0,0 +1,414 @@ +// Copyright 2018 Google LLC. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package nftables + +import ( + "encoding/binary" + "errors" + "fmt" + + "github.com/google/nftables/binaryutil" + "github.com/mdlayher/netlink" + "golang.org/x/sys/unix" +) + +var allocSetID uint32 + +// SetDatatype represents a datatype declared by nft. +type SetDatatype struct { + Name string + Bytes uint32 + + // nftMagic represents the magic value that nft uses for + // certain types (ie: IP addresses). We populate SET_KEY_TYPE + // identically, so `nft list ...` commands produce correct output. + nftMagic uint32 +} + +// NFT datatypes. See: https://git.netfilter.org/nftables/tree/src/datatype.c +var ( + TypeInvalid = SetDatatype{Name: "invalid", nftMagic: 1} + TypeIPAddr = SetDatatype{Name: "ipv4_addr", Bytes: 4, nftMagic: 7} + TypeIP6Addr = SetDatatype{Name: "ipv6_addr", Bytes: 16, nftMagic: 8} + TypeEtherAddr = SetDatatype{Name: "ether_addr", Bytes: 6, nftMagic: 9} + TypeInetProto = SetDatatype{Name: "inet_proto", Bytes: 1, nftMagic: 12} + TypeInetService = SetDatatype{Name: "inet_service", Bytes: 2, nftMagic: 13} + + nftDatatypes = []SetDatatype{ + TypeIPAddr, + TypeIP6Addr, + TypeEtherAddr, + TypeInetProto, + TypeInetService, + } +) + +// Set represents an nftables set. Anonymous sets are only valid within the +// context of a single batch. +type Set struct { + Table *Table + ID uint32 + Name string + Anonymous bool + Constant bool + + KeyType SetDatatype + DataLen int +} + +// SetElement represents a data point within a set. +type SetElement struct { + Key []byte + Val []byte +} + +// SetAddElements applies data points to an nftables set. +func (cc *Conn) SetAddElements(s *Set, vals []SetElement) error { + if s.Anonymous { + return errors.New("anonymous sets cannot be updated") + } + + elements, err := s.makeElemList(vals) + if err != nil { + return err + } + cc.messages = append(cc.messages, netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSETELEM), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsCreate, + }, + Data: append(extraHeader(unix.NFTA_SET_NAME, 0), cc.marshalAttr(elements)...), + }) + + return nil +} + +func (s *Set) makeElemList(vals []SetElement) ([]netlink.Attribute, error) { + var elements []netlink.Attribute + for i, v := range vals { + encodedKey, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NFTA_DATA_VALUE, Data: v.Key}}) + if err != nil { + return nil, fmt.Errorf("marshal key %d: %v", i, err) + } + item := []netlink.Attribute{{Type: unix.NFTA_SET_ELEM_KEY | unix.NLA_F_NESTED, Data: encodedKey}} + + if len(v.Val) > 0 { + encodedVal, err := netlink.MarshalAttributes([]netlink.Attribute{{Type: unix.NFTA_DATA_VALUE, Data: v.Val}}) + if err != nil { + return nil, fmt.Errorf("marshal item %d: %v", i, err) + } + item = append(item, netlink.Attribute{Type: unix.NFTA_SET_ELEM_DATA | unix.NLA_F_NESTED, Data: encodedVal}) + } + + encodedItem, err := netlink.MarshalAttributes(item) + if err != nil { + return nil, fmt.Errorf("marshal item %d: %v", i, err) + } + elements = append(elements, netlink.Attribute{Type: uint16(i+1) | unix.NLA_F_NESTED, Data: encodedItem}) + } + + encodedElem, err := netlink.MarshalAttributes(elements) + if err != nil { + return nil, fmt.Errorf("marshal elements: %v", err) + } + + return []netlink.Attribute{ + {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")}, + {Type: unix.NFTA_SET_KEY_TYPE, Data: binaryutil.BigEndian.PutUint32(unix.NFTA_DATA_VALUE)}, + {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")}, + {Type: unix.NFTA_SET_ELEM_LIST_ELEMENTS | unix.NLA_F_NESTED, Data: encodedElem}, + }, nil +} + +// AddSet adds the specified Set. +func (cc *Conn) AddSet(s *Set, vals []SetElement) error { + // Based on nft implementation & linux source. + // Link: https://github.com/torvalds/linux/blob/49a57857aeea06ca831043acbb0fa5e0f50602fd/net/netfilter/nf_tables_api.c#L3395 + // Another reference: https://git.netfilter.org/nftables/tree/src + + if s.Anonymous && !s.Constant { + return errors.New("anonymous structs must be constant") + } + + if s.ID == 0 { + allocSetID++ + s.ID = allocSetID + if s.Anonymous { + s.Name = "__set%d" + } + } + + setData := cc.marshalAttr([]netlink.Attribute{ + {Type: unix.NFTA_SET_DESC_SIZE, Data: binaryutil.BigEndian.PutUint32(uint32(s.KeyType.Bytes))}, + }) + + var flags uint32 + if s.Anonymous { + flags |= unix.NFT_SET_ANONYMOUS + } + if s.Constant { + flags |= unix.NFT_SET_CONSTANT + } + if s.DataLen > 0 { + flags |= unix.NFT_SET_MAP + } + + tableInfo := []netlink.Attribute{ + {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")}, + {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")}, + {Type: unix.NFTA_SET_ELEM_FLAGS, Data: binaryutil.BigEndian.PutUint32(flags)}, + {Type: unix.NFTA_SET_KEY_TYPE, Data: binaryutil.BigEndian.PutUint32(s.KeyType.nftMagic)}, + {Type: unix.NFTA_SET_KEY_LEN, Data: binaryutil.BigEndian.PutUint32(s.KeyType.Bytes)}, + {Type: unix.NFTA_SET_ID, Data: binaryutil.BigEndian.PutUint32(s.ID)}, + } + if s.DataLen > 0 { + tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NFTA_SET_DATA_TYPE, Data: binaryutil.BigEndian.PutUint32(unix.NFT_DATA_VALUE)}, + netlink.Attribute{Type: unix.NFTA_SET_DATA_LEN, Data: binaryutil.BigEndian.PutUint32(uint32(s.DataLen))}) + } + if s.Anonymous || s.Constant { + tableInfo = append(tableInfo, netlink.Attribute{Type: unix.NLA_F_NESTED | unix.NFTA_SET_DESC, Data: setData}, + // Semantically useless - kept for binary compatability with nft + netlink.Attribute{Type: unix.NFTA_SET_USERDATA, Data: []byte("\x00\x04\x02\x00\x00\x00")}) + } + + cc.messages = append(cc.messages, netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSET), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsCreate, + }, + Data: append(extraHeader(unix.NFTA_SET_NAME, 0), cc.marshalAttr(tableInfo)...), + }) + + // Set the values of the set if initial values were provided. + if len(vals) > 0 { + hdrType := unix.NFT_MSG_NEWSETELEM + if s.Anonymous { + // Anonymous sets can only be populated within NEWSET. + hdrType = unix.NFT_MSG_NEWSET + } + elements, err := s.makeElemList(vals) + if err != nil { + return err + } + cc.messages = append(cc.messages, netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | hdrType), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsCreate, + }, + Data: append(extraHeader(unix.NFTA_SET_NAME, 0), cc.marshalAttr(elements)...), + }) + } + + return nil +} + +// DelSet deletes a specific set, along with all elements it contains. +func (cc *Conn) DelSet(s *Set) { + data := cc.marshalAttr([]netlink.Attribute{ + {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")}, + {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")}, + }) + cc.messages = append(cc.messages, netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELSET), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge, + }, + Data: append(extraHeader(uint8(unix.NFTA_SET_NAME), 0), data...), + }) +} + +// SetDeleteElements deletes data points from an nftables set. +func (cc *Conn) SetDeleteElements(s *Set, vals []SetElement) error { + if s.Anonymous { + return errors.New("anonymous sets cannot be updated") + } + + elements, err := s.makeElemList(vals) + if err != nil { + return err + } + cc.messages = append(cc.messages, netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELSETELEM), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsCreate, + }, + Data: append(extraHeader(unix.NFTA_SET_NAME, 0), cc.marshalAttr(elements)...), + }) + + return nil +} + +var setHeaderType = netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSET) + +func setsFromMsg(msg netlink.Message) (*Set, error) { + if got, want := msg.Header.Type, setHeaderType; got != want { + return nil, fmt.Errorf("unexpected header type: got %v, want %v", got, want) + } + ad, err := netlink.NewAttributeDecoder(msg.Data[4:]) + if err != nil { + return nil, err + } + ad.ByteOrder = binary.BigEndian + + var set Set + for ad.Next() { + switch ad.Type() { + case unix.NFTA_SET_NAME: + set.Name = ad.String() + case unix.NFTA_SET_ID: + set.ID = binary.BigEndian.Uint32(ad.Bytes()) + case unix.NFTA_SET_FLAGS: + flags := ad.Uint32() + set.Constant = (flags & unix.NFT_SET_CONSTANT) != 0 + set.Anonymous = (flags & unix.NFT_SET_ANONYMOUS) != 0 + case unix.NFTA_SET_KEY_TYPE: + nftMagic := ad.Uint32() + for _, dt := range nftDatatypes { + if nftMagic == dt.nftMagic { + set.KeyType = dt + break + } + } + if set.KeyType.nftMagic == 0 { + return nil, fmt.Errorf("could not determine datatype %x", nftMagic) + } + } + } + return &set, nil +} + +var elemHeaderType = netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_NEWSETELEM) + +func elementsFromMsg(msg netlink.Message) ([]SetElement, error) { + if got, want := msg.Header.Type, elemHeaderType; got != want { + return nil, fmt.Errorf("unexpected header type: got %v, want %v", got, want) + } + ad, err := netlink.NewAttributeDecoder(msg.Data[4:]) + if err != nil { + return nil, err + } + ad.ByteOrder = binary.BigEndian + + var elements []SetElement + for ad.Next() { + b := ad.Bytes() + if ad.Type() == unix.NFTA_SET_ELEM_LIST_ELEMENTS && len(b) > 8 { + ad, err := netlink.NewAttributeDecoder(b[8:]) + if err != nil { + return nil, err + } + ad.ByteOrder = binary.BigEndian + + var elem SetElement + for ad.Next() { + switch ad.Type() { + case unix.NFTA_SET_ELEM_KEY: + elem.Key = ad.Bytes() + case unix.NFTA_SET_ELEM_DATA: + elem.Val = ad.Bytes() + } + } + elements = append(elements, elem) + } + } + return elements, nil +} + +// GetSets returns the sets in the specified table. +func (cc *Conn) GetSets(t *Table) ([]*Set, error) { + conn, err := cc.dialNetlink() + if err != nil { + return nil, err + } + defer conn.Close() + + data, err := netlink.MarshalAttributes([]netlink.Attribute{ + {Type: unix.NFTA_SET_TABLE, Data: []byte(t.Name + "\x00")}, + }) + if err != nil { + return nil, err + } + + message := netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_GETSET), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsDump, + }, + Data: append(extraHeader(uint8(t.Family), 0), data...), + } + + if _, err := conn.SendMessages([]netlink.Message{message}); err != nil { + return nil, fmt.Errorf("SendMessages: %v", err) + } + + reply, err := conn.Receive() + if err != nil { + return nil, fmt.Errorf("Receive: %v", err) + } + var sets []*Set + for _, msg := range reply { + s, err := setsFromMsg(msg) + if err != nil { + return nil, err + } + sets = append(sets, s) + } + + return sets, nil +} + +// GetSetElements returns the elements in the specified set. +func (cc *Conn) GetSetElements(s *Set) ([]SetElement, error) { + conn, err := cc.dialNetlink() + if err != nil { + return nil, err + } + defer conn.Close() + + data, err := netlink.MarshalAttributes([]netlink.Attribute{ + {Type: unix.NFTA_SET_TABLE, Data: []byte(s.Table.Name + "\x00")}, + {Type: unix.NFTA_SET_NAME, Data: []byte(s.Name + "\x00")}, + }) + if err != nil { + return nil, err + } + + message := netlink.Message{ + Header: netlink.Header{ + Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_GETSETELEM), + Flags: netlink.HeaderFlagsRequest | netlink.HeaderFlagsAcknowledge | netlink.HeaderFlagsDump, + }, + Data: append(extraHeader(uint8(s.Table.Family), 0), data...), + } + + if _, err := conn.SendMessages([]netlink.Message{message}); err != nil { + return nil, fmt.Errorf("SendMessages: %v", err) + } + + reply, err := conn.Receive() + if err != nil { + return nil, fmt.Errorf("Receive: %v", err) + } + var elems []SetElement + for _, msg := range reply { + s, err := elementsFromMsg(msg) + if err != nil { + return nil, err + } + elems = append(elems, s...) + } + + return elems, nil +}