2022-11-01 01:48:00 -05:00
// Copyright 2018 Google LLC. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nftables
import (
"encoding/binary"
"fmt"
"github.com/google/nftables/binaryutil"
"github.com/mdlayher/netlink"
"golang.org/x/sys/unix"
)
const (
// not in ztypes_linux.go, added here
// https://cs.opensource.google/go/x/sys/+/c6bc011c:unix/ztypes_linux.go;l=1870-1892
NFT_MSG_NEWFLOWTABLE = 0x16
NFT_MSG_GETFLOWTABLE = 0x17
NFT_MSG_DELFLOWTABLE = 0x18
)
const (
// not in ztypes_linux.go, added here
// https://git.netfilter.org/libnftnl/tree/include/linux/netfilter/nf_tables.h?id=84d12cfacf8ddd857a09435f3d982ab6250d250c#n1634
_ = iota
NFTA_FLOWTABLE_TABLE
NFTA_FLOWTABLE_NAME
NFTA_FLOWTABLE_HOOK
NFTA_FLOWTABLE_USE
NFTA_FLOWTABLE_HANDLE
NFTA_FLOWTABLE_PAD
NFTA_FLOWTABLE_FLAGS
)
const (
// not in ztypes_linux.go, added here
// https://git.netfilter.org/libnftnl/tree/include/linux/netfilter/nf_tables.h?id=84d12cfacf8ddd857a09435f3d982ab6250d250c#n1657
_ = iota
NFTA_FLOWTABLE_HOOK_NUM
NFTA_FLOWTABLE_PRIORITY
NFTA_FLOWTABLE_DEVS
)
const (
// not in ztypes_linux.go, added here, used for flowtable device name specification
// https://git.netfilter.org/libnftnl/tree/include/linux/netfilter/nf_tables.h?id=84d12cfacf8ddd857a09435f3d982ab6250d250c#n1709
NFTA_DEVICE_NAME = 1
)
type FlowtableFlags uint32
const (
_ FlowtableFlags = iota
FlowtableFlagsHWOffload
FlowtableFlagsCounter
FlowtableFlagsMask = ( FlowtableFlagsHWOffload | FlowtableFlagsCounter )
)
type FlowtableHook uint32
func FlowtableHookRef ( h FlowtableHook ) * FlowtableHook {
return & h
}
var (
// Only ingress is supported
// https://github.com/torvalds/linux/blob/b72018ab8236c3ae427068adeb94bdd3f20454ec/net/netfilter/nf_tables_api.c#L7378-L7379
FlowtableHookIngress * FlowtableHook = FlowtableHookRef ( unix . NF_NETDEV_INGRESS )
)
type FlowtablePriority int32
func FlowtablePriorityRef ( p FlowtablePriority ) * FlowtablePriority {
return & p
}
var (
// As per man page:
// The priority can be a signed integer or filter which stands for 0. Addition and subtraction can be used to set relative priority, e.g. filter + 5 equals to 5.
// https://git.netfilter.org/nftables/tree/doc/nft.txt?id=8c600a843b7c0c1cc275ecc0603bd1fc57773e98#n712
FlowtablePriorityFilter * FlowtablePriority = FlowtablePriorityRef ( 0 )
)
type Flowtable struct {
Table * Table
Name string
Hooknum * FlowtableHook
Priority * FlowtablePriority
Devices [ ] string
Use uint32
// Bitmask flags, can be HW_OFFLOAD or COUNTER
// https://git.netfilter.org/libnftnl/tree/include/linux/netfilter/nf_tables.h?id=84d12cfacf8ddd857a09435f3d982ab6250d250c#n1621
Flags FlowtableFlags
Handle uint64
}
func ( cc * Conn ) AddFlowtable ( f * Flowtable ) * Flowtable {
cc . mu . Lock ( )
defer cc . mu . Unlock ( )
data := cc . marshalAttr ( [ ] netlink . Attribute {
{ Type : NFTA_FLOWTABLE_TABLE , Data : [ ] byte ( f . Table . Name ) } ,
{ Type : NFTA_FLOWTABLE_NAME , Data : [ ] byte ( f . Name ) } ,
{ Type : NFTA_FLOWTABLE_FLAGS , Data : binaryutil . BigEndian . PutUint32 ( uint32 ( f . Flags ) ) } ,
} )
if f . Hooknum == nil {
f . Hooknum = FlowtableHookIngress
}
if f . Priority == nil {
f . Priority = FlowtablePriorityFilter
}
hookAttr := [ ] netlink . Attribute {
{ Type : NFTA_FLOWTABLE_HOOK_NUM , Data : binaryutil . BigEndian . PutUint32 ( uint32 ( * f . Hooknum ) ) } ,
{ Type : NFTA_FLOWTABLE_PRIORITY , Data : binaryutil . BigEndian . PutUint32 ( uint32 ( * f . Priority ) ) } ,
}
if len ( f . Devices ) > 0 {
devs := make ( [ ] netlink . Attribute , len ( f . Devices ) )
for i , d := range f . Devices {
devs [ i ] = netlink . Attribute { Type : NFTA_DEVICE_NAME , Data : [ ] byte ( d ) }
}
hookAttr = append ( hookAttr , netlink . Attribute {
Type : unix . NLA_F_NESTED | NFTA_FLOWTABLE_DEVS ,
Data : cc . marshalAttr ( devs ) ,
} )
}
data = append ( data , cc . marshalAttr ( [ ] netlink . Attribute {
{ Type : unix . NLA_F_NESTED | NFTA_FLOWTABLE_HOOK , Data : cc . marshalAttr ( hookAttr ) } ,
} ) ... )
Set rule handle during flush
This change makes it possible to delete rules after inserting them,
without needing to query the rules first. Rules can be deleted both
before and after they are flushed. Additionally, this allows positioning
a new rule next to an existing rule, both before and after the existing
rule is flushed.
There are two ways to refer to a rule: Either by ID or by handle. The ID
is assigned by userspace, and is only valid within a transaction, so it
can only be used before the flush. The handle is assigned by the kernel
when the transaction is committed, and can thus only be used after the
flush. We thus need to set an ID on each newly created rule, and
retrieve the handle of the rule during the flush.
There was an existing mechanism to allocate IDs for sets, but this was
using a global counter without any synchronization to prevent data
races. I replaced this by a new mechanism which uses a connection-scoped
counter.
I implemented a new mechanism for retrieving replies in Flush, and
handling these replies by adding a callback to netlink messages. There
was some existing code to handle "overrun", which I deleted, because it
was nonsensical and just worked by accident. NLMSG_OVERRUN is in fact
not a flag, but a complete message type, so the (re&netlink.Overrun)
masking makes no sense. Even better, NLMSG_OVERRUN is never actually
used by Linux. What this code was actually doing was skipping over the
NFT_MSG_NEWRULE replies, and possibly a NFT_MSG_NEWGEN reply.
I had to update all existing tests which compared generated netlink
messages against a reference, by inserting the newly added ID attribute.
We also need to generate replies for the NFT_MSG_NEWRULE messages with a
handle added.
2025-02-20 13:12:30 -06:00
cc . messages = append ( cc . messages , netlinkMessage {
2022-11-01 01:48:00 -05:00
Header : netlink . Header {
Type : netlink . HeaderType ( ( unix . NFNL_SUBSYS_NFTABLES << 8 ) | NFT_MSG_NEWFLOWTABLE ) ,
Flags : netlink . Request | netlink . Acknowledge | netlink . Create ,
} ,
Data : append ( extraHeader ( uint8 ( f . Table . Family ) , 0 ) , data ... ) ,
} )
return f
}
func ( cc * Conn ) DelFlowtable ( f * Flowtable ) {
cc . mu . Lock ( )
defer cc . mu . Unlock ( )
data := cc . marshalAttr ( [ ] netlink . Attribute {
{ Type : NFTA_FLOWTABLE_TABLE , Data : [ ] byte ( f . Table . Name ) } ,
{ Type : NFTA_FLOWTABLE_NAME , Data : [ ] byte ( f . Name ) } ,
} )
Set rule handle during flush
This change makes it possible to delete rules after inserting them,
without needing to query the rules first. Rules can be deleted both
before and after they are flushed. Additionally, this allows positioning
a new rule next to an existing rule, both before and after the existing
rule is flushed.
There are two ways to refer to a rule: Either by ID or by handle. The ID
is assigned by userspace, and is only valid within a transaction, so it
can only be used before the flush. The handle is assigned by the kernel
when the transaction is committed, and can thus only be used after the
flush. We thus need to set an ID on each newly created rule, and
retrieve the handle of the rule during the flush.
There was an existing mechanism to allocate IDs for sets, but this was
using a global counter without any synchronization to prevent data
races. I replaced this by a new mechanism which uses a connection-scoped
counter.
I implemented a new mechanism for retrieving replies in Flush, and
handling these replies by adding a callback to netlink messages. There
was some existing code to handle "overrun", which I deleted, because it
was nonsensical and just worked by accident. NLMSG_OVERRUN is in fact
not a flag, but a complete message type, so the (re&netlink.Overrun)
masking makes no sense. Even better, NLMSG_OVERRUN is never actually
used by Linux. What this code was actually doing was skipping over the
NFT_MSG_NEWRULE replies, and possibly a NFT_MSG_NEWGEN reply.
I had to update all existing tests which compared generated netlink
messages against a reference, by inserting the newly added ID attribute.
We also need to generate replies for the NFT_MSG_NEWRULE messages with a
handle added.
2025-02-20 13:12:30 -06:00
cc . messages = append ( cc . messages , netlinkMessage {
2022-11-01 01:48:00 -05:00
Header : netlink . Header {
Type : netlink . HeaderType ( ( unix . NFNL_SUBSYS_NFTABLES << 8 ) | NFT_MSG_DELFLOWTABLE ) ,
Flags : netlink . Request | netlink . Acknowledge ,
} ,
Data : append ( extraHeader ( uint8 ( f . Table . Family ) , 0 ) , data ... ) ,
} )
}
func ( cc * Conn ) ListFlowtables ( t * Table ) ( [ ] * Flowtable , error ) {
reply , err := cc . getFlowtables ( t )
if err != nil {
return nil , err
}
var fts [ ] * Flowtable
for _ , msg := range reply {
f , err := ftsFromMsg ( msg )
if err != nil {
return nil , err
}
f . Table = t
fts = append ( fts , f )
}
return fts , nil
}
func ( cc * Conn ) getFlowtables ( t * Table ) ( [ ] netlink . Message , error ) {
conn , closer , err := cc . netlinkConn ( )
if err != nil {
return nil , err
}
defer func ( ) { _ = closer ( ) } ( )
attrs := [ ] netlink . Attribute {
{ Type : NFTA_FLOWTABLE_TABLE , Data : [ ] byte ( t . Name + "\x00" ) } ,
}
data , err := netlink . MarshalAttributes ( attrs )
if err != nil {
return nil , err
}
message := netlink . Message {
Header : netlink . Header {
Type : netlink . HeaderType ( ( unix . NFNL_SUBSYS_NFTABLES << 8 ) | NFT_MSG_GETFLOWTABLE ) ,
Flags : netlink . Request | netlink . Acknowledge | netlink . Dump ,
} ,
Data : append ( extraHeader ( uint8 ( t . Family ) , 0 ) , data ... ) ,
}
if _ , err := conn . SendMessages ( [ ] netlink . Message { message } ) ; err != nil {
return nil , fmt . Errorf ( "SendMessages: %v" , err )
}
reply , err := receiveAckAware ( conn , message . Header . Flags )
if err != nil {
2024-08-11 03:00:48 -05:00
return nil , fmt . Errorf ( "receiveAckAware: %v" , err )
2022-11-01 01:48:00 -05:00
}
return reply , nil
}
func ftsFromMsg ( msg netlink . Message ) ( * Flowtable , error ) {
flowHeaderType := netlink . HeaderType ( ( unix . NFNL_SUBSYS_NFTABLES << 8 ) | NFT_MSG_NEWFLOWTABLE )
if got , want := msg . Header . Type , flowHeaderType ; got != want {
return nil , fmt . Errorf ( "unexpected header type: got %v, want %v" , got , want )
}
ad , err := netlink . NewAttributeDecoder ( msg . Data [ 4 : ] )
if err != nil {
return nil , err
}
ad . ByteOrder = binary . BigEndian
var ft Flowtable
for ad . Next ( ) {
switch ad . Type ( ) {
case NFTA_FLOWTABLE_NAME :
ft . Name = ad . String ( )
case NFTA_FLOWTABLE_USE :
ft . Use = ad . Uint32 ( )
case NFTA_FLOWTABLE_HANDLE :
ft . Handle = ad . Uint64 ( )
case NFTA_FLOWTABLE_FLAGS :
ft . Flags = FlowtableFlags ( ad . Uint32 ( ) )
case NFTA_FLOWTABLE_HOOK :
ad . Do ( func ( b [ ] byte ) error {
ft . Hooknum , ft . Priority , ft . Devices , err = ftsHookFromMsg ( b )
return err
} )
}
}
return & ft , nil
}
func ftsHookFromMsg ( b [ ] byte ) ( * FlowtableHook , * FlowtablePriority , [ ] string , error ) {
ad , err := netlink . NewAttributeDecoder ( b )
if err != nil {
return nil , nil , nil , err
}
ad . ByteOrder = binary . BigEndian
var hooknum FlowtableHook
var prio FlowtablePriority
var devices [ ] string
for ad . Next ( ) {
switch ad . Type ( ) {
case NFTA_FLOWTABLE_HOOK_NUM :
hooknum = FlowtableHook ( ad . Uint32 ( ) )
case NFTA_FLOWTABLE_PRIORITY :
prio = FlowtablePriority ( ad . Uint32 ( ) )
case NFTA_FLOWTABLE_DEVS :
ad . Do ( func ( b [ ] byte ) error {
devices , err = devsFromMsg ( b )
return err
} )
}
}
return & hooknum , & prio , devices , nil
}
func devsFromMsg ( b [ ] byte ) ( [ ] string , error ) {
ad , err := netlink . NewAttributeDecoder ( b )
if err != nil {
return nil , err
}
ad . ByteOrder = binary . BigEndian
devs := make ( [ ] string , 0 )
for ad . Next ( ) {
switch ad . Type ( ) {
case NFTA_DEVICE_NAME :
devs = append ( devs , ad . String ( ) )
}
}
return devs , nil
}