371 lines
10 KiB
Go
371 lines
10 KiB
Go
// Copyright 2018 Google LLC. All Rights Reserved.
|
|
//
|
|
// Licensed under the Apache License, Version 2.0 (the "License");
|
|
// you may not use this file except in compliance with the License.
|
|
// You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing, software
|
|
// distributed under the License is distributed on an "AS IS" BASIS,
|
|
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
// See the License for the specific language governing permissions and
|
|
// limitations under the License.
|
|
|
|
package nftables
|
|
|
|
import (
|
|
"errors"
|
|
"fmt"
|
|
"os"
|
|
"sync"
|
|
|
|
"github.com/google/nftables/binaryutil"
|
|
"github.com/google/nftables/expr"
|
|
"github.com/mdlayher/netlink"
|
|
"github.com/mdlayher/netlink/nltest"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// A Conn represents a netlink connection of the nftables family.
|
|
//
|
|
// All methods return their input, so that variables can be defined from string
|
|
// literals when desired.
|
|
//
|
|
// Commands are buffered. Flush sends all buffered commands in a single batch.
|
|
type Conn struct {
|
|
TestDial nltest.Func // for testing only; passed to nltest.Dial
|
|
NetNS int // fd referencing the network namespace netlink will interact with.
|
|
|
|
lasting bool // establish a lasting connection to be used across multiple netlink operations.
|
|
mu sync.Mutex // protects the following state
|
|
messages []netlink.Message
|
|
err error
|
|
nlconn *netlink.Conn // netlink socket using NETLINK_NETFILTER protocol.
|
|
sockOptions []SockOption
|
|
}
|
|
|
|
// ConnOption is an option to change the behavior of the nftables Conn returned by Open.
|
|
type ConnOption func(*Conn)
|
|
|
|
// SockOption is an option to change the behavior of the netlink socket used by the nftables Conn.
|
|
type SockOption func(*netlink.Conn) error
|
|
|
|
// New returns a netlink connection for querying and modifying nftables. Some
|
|
// aspects of the new netlink connection can be configured using the options
|
|
// WithNetNSFd, WithTestDial, and AsLasting.
|
|
//
|
|
// A lasting netlink connection should be closed by calling CloseLasting() to
|
|
// close the underlying lasting netlink connection, cancelling all pending
|
|
// operations using this connection.
|
|
func New(opts ...ConnOption) (*Conn, error) {
|
|
cc := &Conn{}
|
|
for _, opt := range opts {
|
|
opt(cc)
|
|
}
|
|
|
|
if !cc.lasting {
|
|
return cc, nil
|
|
}
|
|
|
|
nlconn, err := cc.dialNetlink()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
cc.nlconn = nlconn
|
|
return cc, nil
|
|
}
|
|
|
|
// AsLasting creates the new netlink connection as a lasting connection that is
|
|
// reused across multiple netlink operations, instead of opening and closing the
|
|
// underlying netlink connection only for the duration of a single netlink
|
|
// operation.
|
|
func AsLasting() ConnOption {
|
|
return func(cc *Conn) {
|
|
// We cannot create the underlying connection yet, as we are called
|
|
// anywhere in the option processing chain and there might be later
|
|
// options still modifying connection behavior.
|
|
cc.lasting = true
|
|
}
|
|
}
|
|
|
|
// WithNetNSFd sets the network namespace to create a new netlink connection to:
|
|
// the fd must reference a network namespace.
|
|
func WithNetNSFd(fd int) ConnOption {
|
|
return func(cc *Conn) {
|
|
cc.NetNS = fd
|
|
}
|
|
}
|
|
|
|
// WithTestDial sets the specified nltest.Func when creating a new netlink
|
|
// connection.
|
|
func WithTestDial(f nltest.Func) ConnOption {
|
|
return func(cc *Conn) {
|
|
cc.TestDial = f
|
|
}
|
|
}
|
|
|
|
// WithSockOptions sets the specified socket options when creating a new netlink
|
|
// connection.
|
|
func WithSockOptions(opts ...SockOption) ConnOption {
|
|
return func(cc *Conn) {
|
|
cc.sockOptions = append(cc.sockOptions, opts...)
|
|
}
|
|
}
|
|
|
|
// netlinkCloser is returned by netlinkConn(UnderLock) and must be called after
|
|
// being done with the returned netlink connection in order to properly close
|
|
// this connection, if necessary.
|
|
type netlinkCloser func() error
|
|
|
|
// netlinkConn returns a netlink connection together with a netlinkCloser that
|
|
// later must be called by the caller when it doesn't need the returned netlink
|
|
// connection anymore. The netlinkCloser will close the netlink connection when
|
|
// necessary. If New has been told to create a lasting connection, then this
|
|
// lasting netlink connection will be returned, otherwise a new "transient"
|
|
// netlink connection will be opened and returned instead. netlinkConn must not
|
|
// be called while the Conn.mu lock is currently helt (this will cause a
|
|
// deadlock). Use netlinkConnUnderLock instead in such situations.
|
|
func (cc *Conn) netlinkConn() (*netlink.Conn, netlinkCloser, error) {
|
|
cc.mu.Lock()
|
|
defer cc.mu.Unlock()
|
|
return cc.netlinkConnUnderLock()
|
|
}
|
|
|
|
// netlinkConnUnderLock works like netlinkConn but must be called while holding
|
|
// the Conn.mu lock.
|
|
func (cc *Conn) netlinkConnUnderLock() (*netlink.Conn, netlinkCloser, error) {
|
|
if cc.nlconn != nil {
|
|
return cc.nlconn, func() error { return nil }, nil
|
|
}
|
|
nlconn, err := cc.dialNetlink()
|
|
if err != nil {
|
|
return nil, nil, err
|
|
}
|
|
return nlconn, func() error { return nlconn.Close() }, nil
|
|
}
|
|
|
|
func receiveAckAware(nlconn *netlink.Conn, sentMsgFlags netlink.HeaderFlags) ([]netlink.Message, error) {
|
|
if nlconn == nil {
|
|
return nil, errors.New("netlink conn is not initialized")
|
|
}
|
|
|
|
// first receive will be the message that we expect
|
|
reply, err := nlconn.Receive()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if (sentMsgFlags & netlink.Acknowledge) == 0 {
|
|
// we did not request an ack
|
|
return reply, nil
|
|
}
|
|
|
|
if (sentMsgFlags & netlink.Dump) == netlink.Dump {
|
|
// sent message has Dump flag set, there will be no acks
|
|
// https://github.com/torvalds/linux/blob/7e062cda7d90543ac8c7700fc7c5527d0c0f22ad/net/netlink/af_netlink.c#L2387-L2390
|
|
return reply, nil
|
|
}
|
|
|
|
if len(reply) != 0 {
|
|
last := reply[len(reply)-1]
|
|
for re := last.Header.Type; (re&netlink.Overrun) == netlink.Overrun && (re&netlink.Done) != netlink.Done; re = last.Header.Type {
|
|
// we are not finished, the message is overrun
|
|
r, err := nlconn.Receive()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
reply = append(reply, r...)
|
|
last = reply[len(reply)-1]
|
|
}
|
|
|
|
if last.Header.Type == netlink.Error && binaryutil.BigEndian.Uint32(last.Data[:4]) == 0 {
|
|
// we have already collected an ack
|
|
return reply, nil
|
|
}
|
|
}
|
|
|
|
// Now we expect an ack
|
|
ack, err := nlconn.Receive()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if len(ack) == 0 {
|
|
// received an empty ack?
|
|
return reply, nil
|
|
}
|
|
|
|
msg := ack[0]
|
|
if msg.Header.Type != netlink.Error {
|
|
// acks should be delivered as NLMSG_ERROR
|
|
return nil, fmt.Errorf("expected header %v, but got %v", netlink.Error, msg.Header.Type)
|
|
}
|
|
|
|
if binaryutil.BigEndian.Uint32(msg.Data[:4]) != 0 {
|
|
// if errno field is not set to 0 (success), this is an error
|
|
return nil, fmt.Errorf("error delivered in message: %v", msg.Data)
|
|
}
|
|
|
|
return reply, nil
|
|
}
|
|
|
|
// CloseLasting closes the lasting netlink connection that has been opened using
|
|
// AsLasting option when creating this connection. If either no lasting netlink
|
|
// connection has been opened or the lasting connection is already in the
|
|
// process of closing or has been closed, CloseLasting will immediately return
|
|
// without any error.
|
|
//
|
|
// CloseLasting will terminate all pending netlink operations using the lasting
|
|
// connection.
|
|
//
|
|
// After closing a lasting connection, the connection will revert to using
|
|
// on-demand transient netlink connections when calling further netlink
|
|
// operations (such as GetTables).
|
|
func (cc *Conn) CloseLasting() error {
|
|
// Don't acquire the lock for the whole duration of the CloseLasting
|
|
// operation, but instead only so long as to make sure to only run the
|
|
// netlink socket close on the first time with a lasting netlink socket. As
|
|
// there is only the New() constructor, but no Open() method, it's
|
|
// impossible to reopen a lasting connection.
|
|
cc.mu.Lock()
|
|
nlconn := cc.nlconn
|
|
cc.nlconn = nil
|
|
cc.mu.Unlock()
|
|
if nlconn != nil {
|
|
return nlconn.Close()
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Flush sends all buffered commands in a single batch to nftables.
|
|
func (cc *Conn) Flush() error {
|
|
cc.mu.Lock()
|
|
defer func() {
|
|
cc.messages = nil
|
|
cc.mu.Unlock()
|
|
}()
|
|
if len(cc.messages) == 0 {
|
|
// Messages were already programmed, returning nil
|
|
return nil
|
|
}
|
|
if cc.err != nil {
|
|
return cc.err // serialization error
|
|
}
|
|
conn, closer, err := cc.netlinkConnUnderLock()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() { _ = closer() }()
|
|
|
|
if _, err := conn.SendMessages(batch(cc.messages)); err != nil {
|
|
return fmt.Errorf("SendMessages: %w", err)
|
|
}
|
|
|
|
var errs error
|
|
// Fetch the requested acknowledgement for each message we sent.
|
|
for _, msg := range cc.messages {
|
|
if _, err := receiveAckAware(conn, msg.Header.Flags); err != nil {
|
|
if errors.Is(err, os.ErrPermission) {
|
|
// Kernel will only send one permission error to user space.
|
|
return err
|
|
}
|
|
errs = errors.Join(errs, err)
|
|
}
|
|
}
|
|
|
|
if errs != nil {
|
|
return fmt.Errorf("conn.Receive: %w", errs)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// FlushRuleset flushes the entire ruleset. See also
|
|
// https://wiki.nftables.org/wiki-nftables/index.php/Operations_at_ruleset_level
|
|
func (cc *Conn) FlushRuleset() {
|
|
cc.mu.Lock()
|
|
defer cc.mu.Unlock()
|
|
cc.messages = append(cc.messages, netlink.Message{
|
|
Header: netlink.Header{
|
|
Type: netlink.HeaderType((unix.NFNL_SUBSYS_NFTABLES << 8) | unix.NFT_MSG_DELTABLE),
|
|
Flags: netlink.Request | netlink.Acknowledge | netlink.Create,
|
|
},
|
|
Data: extraHeader(0, 0),
|
|
})
|
|
}
|
|
|
|
func (cc *Conn) dialNetlink() (*netlink.Conn, error) {
|
|
var (
|
|
conn *netlink.Conn
|
|
err error
|
|
)
|
|
|
|
if cc.TestDial != nil {
|
|
conn = nltest.Dial(cc.TestDial)
|
|
} else {
|
|
conn, err = netlink.Dial(unix.NETLINK_NETFILTER, &netlink.Config{NetNS: cc.NetNS})
|
|
}
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
for _, opt := range cc.sockOptions {
|
|
if err := opt(conn); err != nil {
|
|
return nil, err
|
|
}
|
|
}
|
|
|
|
return conn, nil
|
|
}
|
|
|
|
func (cc *Conn) setErr(err error) {
|
|
if cc.err != nil {
|
|
return
|
|
}
|
|
cc.err = err
|
|
}
|
|
|
|
func (cc *Conn) marshalAttr(attrs []netlink.Attribute) []byte {
|
|
b, err := netlink.MarshalAttributes(attrs)
|
|
if err != nil {
|
|
cc.setErr(err)
|
|
return nil
|
|
}
|
|
return b
|
|
}
|
|
|
|
func (cc *Conn) marshalExpr(fam byte, e expr.Any) []byte {
|
|
b, err := expr.Marshal(fam, e)
|
|
if err != nil {
|
|
cc.setErr(err)
|
|
return nil
|
|
}
|
|
return b
|
|
}
|
|
|
|
func batch(messages []netlink.Message) []netlink.Message {
|
|
batch := []netlink.Message{
|
|
{
|
|
Header: netlink.Header{
|
|
Type: netlink.HeaderType(unix.NFNL_MSG_BATCH_BEGIN),
|
|
Flags: netlink.Request,
|
|
},
|
|
Data: extraHeader(0, unix.NFNL_SUBSYS_NFTABLES),
|
|
},
|
|
}
|
|
|
|
batch = append(batch, messages...)
|
|
|
|
batch = append(batch, netlink.Message{
|
|
Header: netlink.Header{
|
|
Type: netlink.HeaderType(unix.NFNL_MSG_BATCH_END),
|
|
Flags: netlink.Request,
|
|
},
|
|
Data: extraHeader(0, unix.NFNL_SUBSYS_NFTABLES),
|
|
})
|
|
|
|
return batch
|
|
}
|