mirror of
https://github.com/aljazceru/kata-containers.git
synced 2026-01-05 23:44:21 +01:00
The netlink dep needs to be updated to get logic for the tuntap
link. It is fixing a bug that uses a generic link instead.
This also requires the golang/x/sys package to be updated
for the IFLA_* constraints.
Commits for github.com/vishvananda/netlink
c8c507c fix: fix ip rule goto bug
db99c04 tuntap: Return TunTapLink instead of GenericLink
e993616 Fix unit test failure: TestNeighAddDelLLIPAddr
fb5fbae Mirred and connmark clobber their ActionAttrs
1187dc9 Fix tests
00009fb Add support for TC_ACT_CONNMARK
fafc1e7 support vlan protocol
fd97bf4 Add command to set devlink device switchdev mode
bcb80b2 Add devlink command by to get specific device name
f504738 Fix function comments based on best practices from Effective Go
e281812 Fix typos
adb577d Add support for IFLA_GSO_*
aa950f2 travis: run tests with Go 1.12.x
b64d7bc travis: specify go_import_path
b9cafe4 remove redundant type assertions in type switch
1e2e7ab Add Support for Virtual XFRM Interfaces
48a75e0 Fix Race Condition in TestXfrmMonitorExpire
e37f4b4 Avoid 64K allocation on the heap with each Receive
332a698 Add devlink commands for devlink device information
cb78b18 neigh_linux: Fix failure on deleted link neighs updates
2bc5004 Replace redundant copied u32 types with type aliases
093e80f Pass Ndmsg to NeighListExecute
78a3099 Make test suite more deterministic
2529893 genetlink: Add missing error check
91b013f code simplification
023a6da Make go vet happier
aa5b058 Simplify code
e137ed6 Replace nl.NewRtAttrChild with method on struct
3b1c596 Run TravisCI with Go 1.10 and 1.11
d741264 Reduce allocations
b48eed5 Add an API to rename rdma device name
02a3831 Adjust conntrack filters
d3a23fd Make AddChild more generic
1404979 Add support for hoplimit metric in routes
6d53654 Add support for neighbor subscription
531df7a Avoid serializing empty TCA_OPTIONS in qdisc messages
56b1bd2 fix: BRIDGE_FLAGS_* constants off-by-one
8aa85bf Add support for action and ifindex in XFRM policy
9eab419 Netlink: Fix Darwin build
2cbcf73 Add a test for Vlan filtering support for bridges.
0bbc55b Initial support for vlan aware bridges.
3ac69fd Add network namespace ID management.
d68dce4 Ingress qdisc add/del Test case
1006cf4 Implementation of HFSC
d85e18e Allow Tuntap non-persist, allow empty tuntap name
d77c86a protinfo: Check if object is nil
a06dabf Increase size of receive buffer
3e48e44 Revert "RTEXT_FILTER_VF doesn't always work with dump request, fixes #354"
028453c RTEXT_FILTER_VF doesn't always work with dump request, fixes #354
ee06b1d add vti6 support
b1cc70d fix prefixlen/local IP, incl. PtP addresses
7c0b594 Implemented String() for netem, fq and fq_codel in qdisc
769bb84 Adjust flags values
5f662e0 Add info about VFs on link
985ab95 Add support for link flag allmulticast
16769db Support LWTUNNEL_ENCAP_SEG6_LOCAL (including tests)
b7f0669 Add test to Add/Del IPv6 route.
55d3a80 Added tests for Gretap/Gretun devices
f07d9d5 Run both Inline/Encap mode in TestSEG6RouteAddDel
1970aef Add RDMA netlink socket for RDMA device information
dc00cf9 Add Hash to U32
23a36f2 Add Divisor to U32
85aa3b7 Add statistics to class attributes
aa0edbe Add support for setting InfininBand Node and Port GUID of a VF
41009d5 Read conntrack flow statistics
a2ad57a Add changelog file, initial release tagging
5236321 Use IFLA_* constants from x/sys/unix
25d2c79 Use IFF_MULTI_QUEUE from x/sys/unix to define TUNTAP_MULTI_QUEUE
d35d6b5 Clarify ESN bitmap length construction logic
a2af46a Add FQ Codel
465b5fe Add Fq Qdisc support
c27b7f7 Run gofmt -s -w on the project
5f5d5cd Add a 'ListExisting' option to get the existing entries in the route/addr/link tables as part of RouteSubscribeWithOptions, AddrSubscribeWithOptions, and LinkSubscribeWithOptions.
5a988e8 Support IPv6 GRE Tun and Tap
7291c36 addr_linux: Implement CacheInfo installation
422ffe6 addr_linux: Skip BROADCAST and LABEL for non-ipv4
1882fa9 Add Matchall filter
7b4c063 Update bpf_linux.go
ad19ca1 netlink: allow non linux builds to pass.
3ff4c21 Don't overwrite the XDP file descriptor with flags
d4235bf Eliminate cgo from netlink.
54ad9e3 Two new functions: LinkSetBondSlave and VethPeerIndex
f67b75e Properly tear down netns at the end of test
016ba6f Add support for managing source MACVLANs
6e7bb56 Run TestSocketGet in dedicated netns
a5d066d Fix LinkAdd for sit tunnel on 3.10 kernel
8bead6f Add requirements to conntrack tests
9ce265f Retrieve VLAN and VNI when listing neighbour
fad79cb Fix go build issue for fou code
Commits for golang/x/sys
88d2dcc unix: add IFLA_* constants for Linux 4.15
c1138c8 unix: update to Linux 4.15, glibc 2.27 and Go 1.10
37707fd unix: move gccgo redeclared *SyscallNoError functions to a separate file
8f27ce8 unix: fix cpuset size argument in sched_affinity syscall
3dbebcf unix: use SyscallNoError and RawSyscallNoError on Linux only
ff2a66f unix: fix godoc comment for clen
0346725 unix: add godoc for Sockaddr* types
90f0fdc plan9: add arm support
ef80224 unix: add sockaddr_l2 definitions
af9a212 unix: don't export padding fields on all platforms
af50095 unix: use ParseDirent from syscall
2c42eef unix: adjust replacement regex for removed struct fields for linux/s390x
fff93fa unix: add Statx on Linux
52ba35d unix: check error return of os.Symlink in tests on Linux
810d700 unix: match seek argument size to signature on linux/arm
b9cf5f9 unix: add cgroupstats type and constants
d38bf78 unix: restore gccgo support
2493af8 plan9: move Unsetenv into env_plan9.go
3ca7571 windows: move Unsetenv into env_windows.go
1792d66 unix: move Unsetenv into env_unix.go
dd9ec17 unix: fix build on Go 1.8
12d9d5b unix: add SchedGetaffinity and SchedSetaffinity on Linux
a3f2cbd unix: fix typo in unix/asm_linux_arm64.s made in 28a7276
28a7276 unix: add SyscallNoError and RawSyscallNoError on Linux
8380141 unix: simplify error handling in *listxattr on FreeBSD
df29b91 unix: add TestSelect for *BSD
801364e unix: add Select on Solaris
d818ba1 unix: remove syscall constants on Solaris
236baca unix: add timeout tests for Select and Pselect on Linux
571f7bb unix: simplify TestGetwd
d5840ad unix: add GetsockoptString for Darwin, *BSD and Solaris
Signed-off-by: Gabi Beyer <gabrielle.n.beyer@intel.com>
747 lines
18 KiB
Go
747 lines
18 KiB
Go
// Package nl has low level primitives for making Netlink calls.
|
|
package nl
|
|
|
|
import (
|
|
"bytes"
|
|
"encoding/binary"
|
|
"fmt"
|
|
"net"
|
|
"runtime"
|
|
"sync"
|
|
"sync/atomic"
|
|
"syscall"
|
|
"unsafe"
|
|
|
|
"github.com/vishvananda/netns"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
const (
|
|
// Family type definitions
|
|
FAMILY_ALL = unix.AF_UNSPEC
|
|
FAMILY_V4 = unix.AF_INET
|
|
FAMILY_V6 = unix.AF_INET6
|
|
FAMILY_MPLS = AF_MPLS
|
|
// Arbitrary set value (greater than default 4k) to allow receiving
|
|
// from kernel more verbose messages e.g. for statistics,
|
|
// tc rules or filters, or other more memory requiring data.
|
|
RECEIVE_BUFFER_SIZE = 65536
|
|
)
|
|
|
|
// SupportedNlFamilies contains the list of netlink families this netlink package supports
|
|
var SupportedNlFamilies = []int{unix.NETLINK_ROUTE, unix.NETLINK_XFRM, unix.NETLINK_NETFILTER}
|
|
|
|
var nextSeqNr uint32
|
|
|
|
// GetIPFamily returns the family type of a net.IP.
|
|
func GetIPFamily(ip net.IP) int {
|
|
if len(ip) <= net.IPv4len {
|
|
return FAMILY_V4
|
|
}
|
|
if ip.To4() != nil {
|
|
return FAMILY_V4
|
|
}
|
|
return FAMILY_V6
|
|
}
|
|
|
|
var nativeEndian binary.ByteOrder
|
|
|
|
// NativeEndian gets native endianness for the system
|
|
func NativeEndian() binary.ByteOrder {
|
|
if nativeEndian == nil {
|
|
var x uint32 = 0x01020304
|
|
if *(*byte)(unsafe.Pointer(&x)) == 0x01 {
|
|
nativeEndian = binary.BigEndian
|
|
} else {
|
|
nativeEndian = binary.LittleEndian
|
|
}
|
|
}
|
|
return nativeEndian
|
|
}
|
|
|
|
// Byte swap a 16 bit value if we aren't big endian
|
|
func Swap16(i uint16) uint16 {
|
|
if NativeEndian() == binary.BigEndian {
|
|
return i
|
|
}
|
|
return (i&0xff00)>>8 | (i&0xff)<<8
|
|
}
|
|
|
|
// Byte swap a 32 bit value if aren't big endian
|
|
func Swap32(i uint32) uint32 {
|
|
if NativeEndian() == binary.BigEndian {
|
|
return i
|
|
}
|
|
return (i&0xff000000)>>24 | (i&0xff0000)>>8 | (i&0xff00)<<8 | (i&0xff)<<24
|
|
}
|
|
|
|
type NetlinkRequestData interface {
|
|
Len() int
|
|
Serialize() []byte
|
|
}
|
|
|
|
// IfInfomsg is related to links, but it is used for list requests as well
|
|
type IfInfomsg struct {
|
|
unix.IfInfomsg
|
|
}
|
|
|
|
// Create an IfInfomsg with family specified
|
|
func NewIfInfomsg(family int) *IfInfomsg {
|
|
return &IfInfomsg{
|
|
IfInfomsg: unix.IfInfomsg{
|
|
Family: uint8(family),
|
|
},
|
|
}
|
|
}
|
|
|
|
func DeserializeIfInfomsg(b []byte) *IfInfomsg {
|
|
return (*IfInfomsg)(unsafe.Pointer(&b[0:unix.SizeofIfInfomsg][0]))
|
|
}
|
|
|
|
func (msg *IfInfomsg) Serialize() []byte {
|
|
return (*(*[unix.SizeofIfInfomsg]byte)(unsafe.Pointer(msg)))[:]
|
|
}
|
|
|
|
func (msg *IfInfomsg) Len() int {
|
|
return unix.SizeofIfInfomsg
|
|
}
|
|
|
|
func (msg *IfInfomsg) EncapType() string {
|
|
switch msg.Type {
|
|
case 0:
|
|
return "generic"
|
|
case unix.ARPHRD_ETHER:
|
|
return "ether"
|
|
case unix.ARPHRD_EETHER:
|
|
return "eether"
|
|
case unix.ARPHRD_AX25:
|
|
return "ax25"
|
|
case unix.ARPHRD_PRONET:
|
|
return "pronet"
|
|
case unix.ARPHRD_CHAOS:
|
|
return "chaos"
|
|
case unix.ARPHRD_IEEE802:
|
|
return "ieee802"
|
|
case unix.ARPHRD_ARCNET:
|
|
return "arcnet"
|
|
case unix.ARPHRD_APPLETLK:
|
|
return "atalk"
|
|
case unix.ARPHRD_DLCI:
|
|
return "dlci"
|
|
case unix.ARPHRD_ATM:
|
|
return "atm"
|
|
case unix.ARPHRD_METRICOM:
|
|
return "metricom"
|
|
case unix.ARPHRD_IEEE1394:
|
|
return "ieee1394"
|
|
case unix.ARPHRD_INFINIBAND:
|
|
return "infiniband"
|
|
case unix.ARPHRD_SLIP:
|
|
return "slip"
|
|
case unix.ARPHRD_CSLIP:
|
|
return "cslip"
|
|
case unix.ARPHRD_SLIP6:
|
|
return "slip6"
|
|
case unix.ARPHRD_CSLIP6:
|
|
return "cslip6"
|
|
case unix.ARPHRD_RSRVD:
|
|
return "rsrvd"
|
|
case unix.ARPHRD_ADAPT:
|
|
return "adapt"
|
|
case unix.ARPHRD_ROSE:
|
|
return "rose"
|
|
case unix.ARPHRD_X25:
|
|
return "x25"
|
|
case unix.ARPHRD_HWX25:
|
|
return "hwx25"
|
|
case unix.ARPHRD_PPP:
|
|
return "ppp"
|
|
case unix.ARPHRD_HDLC:
|
|
return "hdlc"
|
|
case unix.ARPHRD_LAPB:
|
|
return "lapb"
|
|
case unix.ARPHRD_DDCMP:
|
|
return "ddcmp"
|
|
case unix.ARPHRD_RAWHDLC:
|
|
return "rawhdlc"
|
|
case unix.ARPHRD_TUNNEL:
|
|
return "ipip"
|
|
case unix.ARPHRD_TUNNEL6:
|
|
return "tunnel6"
|
|
case unix.ARPHRD_FRAD:
|
|
return "frad"
|
|
case unix.ARPHRD_SKIP:
|
|
return "skip"
|
|
case unix.ARPHRD_LOOPBACK:
|
|
return "loopback"
|
|
case unix.ARPHRD_LOCALTLK:
|
|
return "ltalk"
|
|
case unix.ARPHRD_FDDI:
|
|
return "fddi"
|
|
case unix.ARPHRD_BIF:
|
|
return "bif"
|
|
case unix.ARPHRD_SIT:
|
|
return "sit"
|
|
case unix.ARPHRD_IPDDP:
|
|
return "ip/ddp"
|
|
case unix.ARPHRD_IPGRE:
|
|
return "gre"
|
|
case unix.ARPHRD_PIMREG:
|
|
return "pimreg"
|
|
case unix.ARPHRD_HIPPI:
|
|
return "hippi"
|
|
case unix.ARPHRD_ASH:
|
|
return "ash"
|
|
case unix.ARPHRD_ECONET:
|
|
return "econet"
|
|
case unix.ARPHRD_IRDA:
|
|
return "irda"
|
|
case unix.ARPHRD_FCPP:
|
|
return "fcpp"
|
|
case unix.ARPHRD_FCAL:
|
|
return "fcal"
|
|
case unix.ARPHRD_FCPL:
|
|
return "fcpl"
|
|
case unix.ARPHRD_FCFABRIC:
|
|
return "fcfb0"
|
|
case unix.ARPHRD_FCFABRIC + 1:
|
|
return "fcfb1"
|
|
case unix.ARPHRD_FCFABRIC + 2:
|
|
return "fcfb2"
|
|
case unix.ARPHRD_FCFABRIC + 3:
|
|
return "fcfb3"
|
|
case unix.ARPHRD_FCFABRIC + 4:
|
|
return "fcfb4"
|
|
case unix.ARPHRD_FCFABRIC + 5:
|
|
return "fcfb5"
|
|
case unix.ARPHRD_FCFABRIC + 6:
|
|
return "fcfb6"
|
|
case unix.ARPHRD_FCFABRIC + 7:
|
|
return "fcfb7"
|
|
case unix.ARPHRD_FCFABRIC + 8:
|
|
return "fcfb8"
|
|
case unix.ARPHRD_FCFABRIC + 9:
|
|
return "fcfb9"
|
|
case unix.ARPHRD_FCFABRIC + 10:
|
|
return "fcfb10"
|
|
case unix.ARPHRD_FCFABRIC + 11:
|
|
return "fcfb11"
|
|
case unix.ARPHRD_FCFABRIC + 12:
|
|
return "fcfb12"
|
|
case unix.ARPHRD_IEEE802_TR:
|
|
return "tr"
|
|
case unix.ARPHRD_IEEE80211:
|
|
return "ieee802.11"
|
|
case unix.ARPHRD_IEEE80211_PRISM:
|
|
return "ieee802.11/prism"
|
|
case unix.ARPHRD_IEEE80211_RADIOTAP:
|
|
return "ieee802.11/radiotap"
|
|
case unix.ARPHRD_IEEE802154:
|
|
return "ieee802.15.4"
|
|
|
|
case 65534:
|
|
return "none"
|
|
case 65535:
|
|
return "void"
|
|
}
|
|
return fmt.Sprintf("unknown%d", msg.Type)
|
|
}
|
|
|
|
func rtaAlignOf(attrlen int) int {
|
|
return (attrlen + unix.RTA_ALIGNTO - 1) & ^(unix.RTA_ALIGNTO - 1)
|
|
}
|
|
|
|
func NewIfInfomsgChild(parent *RtAttr, family int) *IfInfomsg {
|
|
msg := NewIfInfomsg(family)
|
|
parent.children = append(parent.children, msg)
|
|
return msg
|
|
}
|
|
|
|
// Extend RtAttr to handle data and children
|
|
type RtAttr struct {
|
|
unix.RtAttr
|
|
Data []byte
|
|
children []NetlinkRequestData
|
|
}
|
|
|
|
// Create a new Extended RtAttr object
|
|
func NewRtAttr(attrType int, data []byte) *RtAttr {
|
|
return &RtAttr{
|
|
RtAttr: unix.RtAttr{
|
|
Type: uint16(attrType),
|
|
},
|
|
children: []NetlinkRequestData{},
|
|
Data: data,
|
|
}
|
|
}
|
|
|
|
// NewRtAttrChild adds an RtAttr as a child to the parent and returns the new attribute
|
|
//
|
|
// Deprecated: Use AddRtAttr() on the parent object
|
|
func NewRtAttrChild(parent *RtAttr, attrType int, data []byte) *RtAttr {
|
|
return parent.AddRtAttr(attrType, data)
|
|
}
|
|
|
|
// AddRtAttr adds an RtAttr as a child and returns the new attribute
|
|
func (a *RtAttr) AddRtAttr(attrType int, data []byte) *RtAttr {
|
|
attr := NewRtAttr(attrType, data)
|
|
a.children = append(a.children, attr)
|
|
return attr
|
|
}
|
|
|
|
// AddChild adds an existing NetlinkRequestData as a child.
|
|
func (a *RtAttr) AddChild(attr NetlinkRequestData) {
|
|
a.children = append(a.children, attr)
|
|
}
|
|
|
|
func (a *RtAttr) Len() int {
|
|
if len(a.children) == 0 {
|
|
return (unix.SizeofRtAttr + len(a.Data))
|
|
}
|
|
|
|
l := 0
|
|
for _, child := range a.children {
|
|
l += rtaAlignOf(child.Len())
|
|
}
|
|
l += unix.SizeofRtAttr
|
|
return rtaAlignOf(l + len(a.Data))
|
|
}
|
|
|
|
// Serialize the RtAttr into a byte array
|
|
// This can't just unsafe.cast because it must iterate through children.
|
|
func (a *RtAttr) Serialize() []byte {
|
|
native := NativeEndian()
|
|
|
|
length := a.Len()
|
|
buf := make([]byte, rtaAlignOf(length))
|
|
|
|
next := 4
|
|
if a.Data != nil {
|
|
copy(buf[next:], a.Data)
|
|
next += rtaAlignOf(len(a.Data))
|
|
}
|
|
if len(a.children) > 0 {
|
|
for _, child := range a.children {
|
|
childBuf := child.Serialize()
|
|
copy(buf[next:], childBuf)
|
|
next += rtaAlignOf(len(childBuf))
|
|
}
|
|
}
|
|
|
|
if l := uint16(length); l != 0 {
|
|
native.PutUint16(buf[0:2], l)
|
|
}
|
|
native.PutUint16(buf[2:4], a.Type)
|
|
return buf
|
|
}
|
|
|
|
type NetlinkRequest struct {
|
|
unix.NlMsghdr
|
|
Data []NetlinkRequestData
|
|
RawData []byte
|
|
Sockets map[int]*SocketHandle
|
|
}
|
|
|
|
// Serialize the Netlink Request into a byte array
|
|
func (req *NetlinkRequest) Serialize() []byte {
|
|
length := unix.SizeofNlMsghdr
|
|
dataBytes := make([][]byte, len(req.Data))
|
|
for i, data := range req.Data {
|
|
dataBytes[i] = data.Serialize()
|
|
length = length + len(dataBytes[i])
|
|
}
|
|
length += len(req.RawData)
|
|
|
|
req.Len = uint32(length)
|
|
b := make([]byte, length)
|
|
hdr := (*(*[unix.SizeofNlMsghdr]byte)(unsafe.Pointer(req)))[:]
|
|
next := unix.SizeofNlMsghdr
|
|
copy(b[0:next], hdr)
|
|
for _, data := range dataBytes {
|
|
for _, dataByte := range data {
|
|
b[next] = dataByte
|
|
next = next + 1
|
|
}
|
|
}
|
|
// Add the raw data if any
|
|
if len(req.RawData) > 0 {
|
|
copy(b[next:length], req.RawData)
|
|
}
|
|
return b
|
|
}
|
|
|
|
func (req *NetlinkRequest) AddData(data NetlinkRequestData) {
|
|
req.Data = append(req.Data, data)
|
|
}
|
|
|
|
// AddRawData adds raw bytes to the end of the NetlinkRequest object during serialization
|
|
func (req *NetlinkRequest) AddRawData(data []byte) {
|
|
req.RawData = append(req.RawData, data...)
|
|
}
|
|
|
|
// Execute the request against a the given sockType.
|
|
// Returns a list of netlink messages in serialized format, optionally filtered
|
|
// by resType.
|
|
func (req *NetlinkRequest) Execute(sockType int, resType uint16) ([][]byte, error) {
|
|
var (
|
|
s *NetlinkSocket
|
|
err error
|
|
)
|
|
|
|
if req.Sockets != nil {
|
|
if sh, ok := req.Sockets[sockType]; ok {
|
|
s = sh.Socket
|
|
req.Seq = atomic.AddUint32(&sh.Seq, 1)
|
|
}
|
|
}
|
|
sharedSocket := s != nil
|
|
|
|
if s == nil {
|
|
s, err = getNetlinkSocket(sockType)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer s.Close()
|
|
} else {
|
|
s.Lock()
|
|
defer s.Unlock()
|
|
}
|
|
|
|
if err := s.Send(req); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
pid, err := s.GetPid()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var res [][]byte
|
|
|
|
done:
|
|
for {
|
|
msgs, err := s.Receive()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
for _, m := range msgs {
|
|
if m.Header.Seq != req.Seq {
|
|
if sharedSocket {
|
|
continue
|
|
}
|
|
return nil, fmt.Errorf("Wrong Seq nr %d, expected %d", m.Header.Seq, req.Seq)
|
|
}
|
|
if m.Header.Pid != pid {
|
|
return nil, fmt.Errorf("Wrong pid %d, expected %d", m.Header.Pid, pid)
|
|
}
|
|
if m.Header.Type == unix.NLMSG_DONE {
|
|
break done
|
|
}
|
|
if m.Header.Type == unix.NLMSG_ERROR {
|
|
native := NativeEndian()
|
|
error := int32(native.Uint32(m.Data[0:4]))
|
|
if error == 0 {
|
|
break done
|
|
}
|
|
return nil, syscall.Errno(-error)
|
|
}
|
|
if resType != 0 && m.Header.Type != resType {
|
|
continue
|
|
}
|
|
res = append(res, m.Data)
|
|
if m.Header.Flags&unix.NLM_F_MULTI == 0 {
|
|
break done
|
|
}
|
|
}
|
|
}
|
|
return res, nil
|
|
}
|
|
|
|
// Create a new netlink request from proto and flags
|
|
// Note the Len value will be inaccurate once data is added until
|
|
// the message is serialized
|
|
func NewNetlinkRequest(proto, flags int) *NetlinkRequest {
|
|
return &NetlinkRequest{
|
|
NlMsghdr: unix.NlMsghdr{
|
|
Len: uint32(unix.SizeofNlMsghdr),
|
|
Type: uint16(proto),
|
|
Flags: unix.NLM_F_REQUEST | uint16(flags),
|
|
Seq: atomic.AddUint32(&nextSeqNr, 1),
|
|
},
|
|
}
|
|
}
|
|
|
|
type NetlinkSocket struct {
|
|
fd int32
|
|
lsa unix.SockaddrNetlink
|
|
sync.Mutex
|
|
}
|
|
|
|
func getNetlinkSocket(protocol int) (*NetlinkSocket, error) {
|
|
fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW|unix.SOCK_CLOEXEC, protocol)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s := &NetlinkSocket{
|
|
fd: int32(fd),
|
|
}
|
|
s.lsa.Family = unix.AF_NETLINK
|
|
if err := unix.Bind(fd, &s.lsa); err != nil {
|
|
unix.Close(fd)
|
|
return nil, err
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// GetNetlinkSocketAt opens a netlink socket in the network namespace newNs
|
|
// and positions the thread back into the network namespace specified by curNs,
|
|
// when done. If curNs is close, the function derives the current namespace and
|
|
// moves back into it when done. If newNs is close, the socket will be opened
|
|
// in the current network namespace.
|
|
func GetNetlinkSocketAt(newNs, curNs netns.NsHandle, protocol int) (*NetlinkSocket, error) {
|
|
c, err := executeInNetns(newNs, curNs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer c()
|
|
return getNetlinkSocket(protocol)
|
|
}
|
|
|
|
// executeInNetns sets execution of the code following this call to the
|
|
// network namespace newNs, then moves the thread back to curNs if open,
|
|
// otherwise to the current netns at the time the function was invoked
|
|
// In case of success, the caller is expected to execute the returned function
|
|
// at the end of the code that needs to be executed in the network namespace.
|
|
// Example:
|
|
// func jobAt(...) error {
|
|
// d, err := executeInNetns(...)
|
|
// if err != nil { return err}
|
|
// defer d()
|
|
// < code which needs to be executed in specific netns>
|
|
// }
|
|
// TODO: his function probably belongs to netns pkg.
|
|
func executeInNetns(newNs, curNs netns.NsHandle) (func(), error) {
|
|
var (
|
|
err error
|
|
moveBack func(netns.NsHandle) error
|
|
closeNs func() error
|
|
unlockThd func()
|
|
)
|
|
restore := func() {
|
|
// order matters
|
|
if moveBack != nil {
|
|
moveBack(curNs)
|
|
}
|
|
if closeNs != nil {
|
|
closeNs()
|
|
}
|
|
if unlockThd != nil {
|
|
unlockThd()
|
|
}
|
|
}
|
|
if newNs.IsOpen() {
|
|
runtime.LockOSThread()
|
|
unlockThd = runtime.UnlockOSThread
|
|
if !curNs.IsOpen() {
|
|
if curNs, err = netns.Get(); err != nil {
|
|
restore()
|
|
return nil, fmt.Errorf("could not get current namespace while creating netlink socket: %v", err)
|
|
}
|
|
closeNs = curNs.Close
|
|
}
|
|
if err := netns.Set(newNs); err != nil {
|
|
restore()
|
|
return nil, fmt.Errorf("failed to set into network namespace %d while creating netlink socket: %v", newNs, err)
|
|
}
|
|
moveBack = netns.Set
|
|
}
|
|
return restore, nil
|
|
}
|
|
|
|
// Create a netlink socket with a given protocol (e.g. NETLINK_ROUTE)
|
|
// and subscribe it to multicast groups passed in variable argument list.
|
|
// Returns the netlink socket on which Receive() method can be called
|
|
// to retrieve the messages from the kernel.
|
|
func Subscribe(protocol int, groups ...uint) (*NetlinkSocket, error) {
|
|
fd, err := unix.Socket(unix.AF_NETLINK, unix.SOCK_RAW, protocol)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
s := &NetlinkSocket{
|
|
fd: int32(fd),
|
|
}
|
|
s.lsa.Family = unix.AF_NETLINK
|
|
|
|
for _, g := range groups {
|
|
s.lsa.Groups |= (1 << (g - 1))
|
|
}
|
|
|
|
if err := unix.Bind(fd, &s.lsa); err != nil {
|
|
unix.Close(fd)
|
|
return nil, err
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// SubscribeAt works like Subscribe plus let's the caller choose the network
|
|
// namespace in which the socket would be opened (newNs). Then control goes back
|
|
// to curNs if open, otherwise to the netns at the time this function was called.
|
|
func SubscribeAt(newNs, curNs netns.NsHandle, protocol int, groups ...uint) (*NetlinkSocket, error) {
|
|
c, err := executeInNetns(newNs, curNs)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer c()
|
|
return Subscribe(protocol, groups...)
|
|
}
|
|
|
|
func (s *NetlinkSocket) Close() {
|
|
fd := int(atomic.SwapInt32(&s.fd, -1))
|
|
unix.Close(fd)
|
|
}
|
|
|
|
func (s *NetlinkSocket) GetFd() int {
|
|
return int(atomic.LoadInt32(&s.fd))
|
|
}
|
|
|
|
func (s *NetlinkSocket) Send(request *NetlinkRequest) error {
|
|
fd := int(atomic.LoadInt32(&s.fd))
|
|
if fd < 0 {
|
|
return fmt.Errorf("Send called on a closed socket")
|
|
}
|
|
if err := unix.Sendto(fd, request.Serialize(), 0, &s.lsa); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func (s *NetlinkSocket) Receive() ([]syscall.NetlinkMessage, error) {
|
|
fd := int(atomic.LoadInt32(&s.fd))
|
|
if fd < 0 {
|
|
return nil, fmt.Errorf("Receive called on a closed socket")
|
|
}
|
|
var rb [RECEIVE_BUFFER_SIZE]byte
|
|
nr, _, err := unix.Recvfrom(fd, rb[:], 0)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if nr < unix.NLMSG_HDRLEN {
|
|
return nil, fmt.Errorf("Got short response from netlink")
|
|
}
|
|
rb2 := make([]byte, nr)
|
|
copy(rb2, rb[:nr])
|
|
return syscall.ParseNetlinkMessage(rb2)
|
|
}
|
|
|
|
// SetSendTimeout allows to set a send timeout on the socket
|
|
func (s *NetlinkSocket) SetSendTimeout(timeout *unix.Timeval) error {
|
|
// Set a send timeout of SOCKET_SEND_TIMEOUT, this will allow the Send to periodically unblock and avoid that a routine
|
|
// remains stuck on a send on a closed fd
|
|
return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_SNDTIMEO, timeout)
|
|
}
|
|
|
|
// SetReceiveTimeout allows to set a receive timeout on the socket
|
|
func (s *NetlinkSocket) SetReceiveTimeout(timeout *unix.Timeval) error {
|
|
// Set a read timeout of SOCKET_READ_TIMEOUT, this will allow the Read to periodically unblock and avoid that a routine
|
|
// remains stuck on a recvmsg on a closed fd
|
|
return unix.SetsockoptTimeval(int(s.fd), unix.SOL_SOCKET, unix.SO_RCVTIMEO, timeout)
|
|
}
|
|
|
|
func (s *NetlinkSocket) GetPid() (uint32, error) {
|
|
fd := int(atomic.LoadInt32(&s.fd))
|
|
lsa, err := unix.Getsockname(fd)
|
|
if err != nil {
|
|
return 0, err
|
|
}
|
|
switch v := lsa.(type) {
|
|
case *unix.SockaddrNetlink:
|
|
return v.Pid, nil
|
|
}
|
|
return 0, fmt.Errorf("Wrong socket type")
|
|
}
|
|
|
|
func ZeroTerminated(s string) []byte {
|
|
bytes := make([]byte, len(s)+1)
|
|
for i := 0; i < len(s); i++ {
|
|
bytes[i] = s[i]
|
|
}
|
|
bytes[len(s)] = 0
|
|
return bytes
|
|
}
|
|
|
|
func NonZeroTerminated(s string) []byte {
|
|
bytes := make([]byte, len(s))
|
|
for i := 0; i < len(s); i++ {
|
|
bytes[i] = s[i]
|
|
}
|
|
return bytes
|
|
}
|
|
|
|
func BytesToString(b []byte) string {
|
|
n := bytes.Index(b, []byte{0})
|
|
return string(b[:n])
|
|
}
|
|
|
|
func Uint8Attr(v uint8) []byte {
|
|
return []byte{byte(v)}
|
|
}
|
|
|
|
func Uint16Attr(v uint16) []byte {
|
|
native := NativeEndian()
|
|
bytes := make([]byte, 2)
|
|
native.PutUint16(bytes, v)
|
|
return bytes
|
|
}
|
|
|
|
func Uint32Attr(v uint32) []byte {
|
|
native := NativeEndian()
|
|
bytes := make([]byte, 4)
|
|
native.PutUint32(bytes, v)
|
|
return bytes
|
|
}
|
|
|
|
func Uint64Attr(v uint64) []byte {
|
|
native := NativeEndian()
|
|
bytes := make([]byte, 8)
|
|
native.PutUint64(bytes, v)
|
|
return bytes
|
|
}
|
|
|
|
func ParseRouteAttr(b []byte) ([]syscall.NetlinkRouteAttr, error) {
|
|
var attrs []syscall.NetlinkRouteAttr
|
|
for len(b) >= unix.SizeofRtAttr {
|
|
a, vbuf, alen, err := netlinkRouteAttrAndValue(b)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
ra := syscall.NetlinkRouteAttr{Attr: syscall.RtAttr(*a), Value: vbuf[:int(a.Len)-unix.SizeofRtAttr]}
|
|
attrs = append(attrs, ra)
|
|
b = b[alen:]
|
|
}
|
|
return attrs, nil
|
|
}
|
|
|
|
func netlinkRouteAttrAndValue(b []byte) (*unix.RtAttr, []byte, int, error) {
|
|
a := (*unix.RtAttr)(unsafe.Pointer(&b[0]))
|
|
if int(a.Len) < unix.SizeofRtAttr || int(a.Len) > len(b) {
|
|
return nil, nil, 0, unix.EINVAL
|
|
}
|
|
return a, b[unix.SizeofRtAttr:], rtaAlignOf(int(a.Len)), nil
|
|
}
|
|
|
|
// SocketHandle contains the netlink socket and the associated
|
|
// sequence counter for a specific netlink family
|
|
type SocketHandle struct {
|
|
Seq uint32
|
|
Socket *NetlinkSocket
|
|
}
|
|
|
|
// Close closes the netlink socket
|
|
func (sh *SocketHandle) Close() {
|
|
if sh.Socket != nil {
|
|
sh.Socket.Close()
|
|
}
|
|
}
|