mirror of
https://github.com/aljazceru/kata-containers.git
synced 2025-12-29 03:54:27 +01:00
Currently we sometimes pass it as a pointer and other times not. As a result, the view of sandbox across virtcontainers may not be the same and it costs extra memory copy each time we pass it by value. Fix it by ensuring sandbox is always passed by pointers. Fixes: #262 Signed-off-by: Peng Tao <bergwolf@gmail.com>
1374 lines
38 KiB
Go
1374 lines
38 KiB
Go
// Copyright (c) 2016 Intel Corporation
|
|
//
|
|
// SPDX-License-Identifier: Apache-2.0
|
|
//
|
|
|
|
package virtcontainers
|
|
|
|
import (
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io/ioutil"
|
|
"math/rand"
|
|
"net"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/containernetworking/plugins/pkg/ns"
|
|
"github.com/kata-containers/runtime/virtcontainers/pkg/uuid"
|
|
"github.com/safchain/ethtool"
|
|
"github.com/sirupsen/logrus"
|
|
"github.com/vishvananda/netlink"
|
|
"github.com/vishvananda/netns"
|
|
"golang.org/x/sys/unix"
|
|
)
|
|
|
|
// NetInterworkingModel defines the network model connecting
|
|
// the network interface to the virtual machine.
|
|
type NetInterworkingModel int
|
|
|
|
const (
|
|
// NetXConnectDefaultModel Ask to use DefaultNetInterworkingModel
|
|
NetXConnectDefaultModel NetInterworkingModel = iota
|
|
|
|
// NetXConnectBridgedModel uses a linux bridge to interconnect
|
|
// the container interface to the VM. This is the
|
|
// safe default that works for most cases except
|
|
// macvlan and ipvlan
|
|
NetXConnectBridgedModel
|
|
|
|
// NetXConnectMacVtapModel can be used when the Container network
|
|
// interface can be bridged using macvtap
|
|
NetXConnectMacVtapModel
|
|
|
|
// NetXConnectEnlightenedModel can be used when the Network plugins
|
|
// are enlightened to create VM native interfaces
|
|
// when requested by the runtime
|
|
// This will be used for vethtap, macvtap, ipvtap
|
|
NetXConnectEnlightenedModel
|
|
|
|
// NetXConnectInvalidModel is the last item to check valid values by IsValid()
|
|
NetXConnectInvalidModel
|
|
)
|
|
|
|
//IsValid checks if a model is valid
|
|
func (n NetInterworkingModel) IsValid() bool {
|
|
return 0 <= int(n) && int(n) < int(NetXConnectInvalidModel)
|
|
}
|
|
|
|
//SetModel change the model string value
|
|
func (n *NetInterworkingModel) SetModel(modelName string) error {
|
|
switch modelName {
|
|
case "default":
|
|
*n = DefaultNetInterworkingModel
|
|
return nil
|
|
case "bridged":
|
|
*n = NetXConnectBridgedModel
|
|
return nil
|
|
case "macvtap":
|
|
*n = NetXConnectMacVtapModel
|
|
return nil
|
|
case "enlightened":
|
|
*n = NetXConnectEnlightenedModel
|
|
return nil
|
|
}
|
|
return fmt.Errorf("Unknown type %s", modelName)
|
|
}
|
|
|
|
// DefaultNetInterworkingModel is a package level default
|
|
// that determines how the VM should be connected to the
|
|
// the container network interface
|
|
var DefaultNetInterworkingModel = NetXConnectMacVtapModel
|
|
|
|
// Introduces constants related to networking
|
|
const (
|
|
defaultRouteDest = "0.0.0.0/0"
|
|
defaultRouteLabel = "default"
|
|
defaultFilePerms = 0600
|
|
defaultQlen = 1500
|
|
defaultQueues = 8
|
|
)
|
|
|
|
// DNSInfo describes the DNS setup related to a network interface.
|
|
type DNSInfo struct {
|
|
Servers []string
|
|
Domain string
|
|
Searches []string
|
|
Options []string
|
|
}
|
|
|
|
// NetlinkIface describes fully a network interface.
|
|
type NetlinkIface struct {
|
|
netlink.LinkAttrs
|
|
Type string
|
|
}
|
|
|
|
// NetworkInfo gathers all information related to a network interface.
|
|
// It can be used to store the description of the underlying network.
|
|
type NetworkInfo struct {
|
|
Iface NetlinkIface
|
|
Addrs []netlink.Addr
|
|
Routes []netlink.Route
|
|
DNS DNSInfo
|
|
}
|
|
|
|
// NetworkInterface defines a network interface.
|
|
type NetworkInterface struct {
|
|
Name string
|
|
HardAddr string
|
|
Addrs []netlink.Addr
|
|
}
|
|
|
|
// NetworkInterfacePair defines a pair between VM and virtual network interfaces.
|
|
type NetworkInterfacePair struct {
|
|
ID string
|
|
Name string
|
|
VirtIface NetworkInterface
|
|
TAPIface NetworkInterface
|
|
NetInterworkingModel
|
|
VMFds []*os.File
|
|
VhostFds []*os.File
|
|
}
|
|
|
|
// NetworkConfig is the network configuration related to a network.
|
|
type NetworkConfig struct {
|
|
NetNSPath string
|
|
NumInterfaces int
|
|
InterworkingModel NetInterworkingModel
|
|
}
|
|
|
|
// Endpoint represents a physical or virtual network interface.
|
|
type Endpoint interface {
|
|
Properties() NetworkInfo
|
|
Name() string
|
|
HardwareAddr() string
|
|
Type() EndpointType
|
|
|
|
SetProperties(NetworkInfo)
|
|
Attach(hypervisor) error
|
|
Detach() error
|
|
}
|
|
|
|
// VirtualEndpoint gathers a network pair and its properties.
|
|
type VirtualEndpoint struct {
|
|
NetPair NetworkInterfacePair
|
|
EndpointProperties NetworkInfo
|
|
Physical bool
|
|
EndpointType EndpointType
|
|
}
|
|
|
|
// PhysicalEndpoint gathers a physical network interface and its properties
|
|
type PhysicalEndpoint struct {
|
|
IfaceName string
|
|
HardAddr string
|
|
EndpointProperties NetworkInfo
|
|
EndpointType EndpointType
|
|
BDF string
|
|
Driver string
|
|
VendorDeviceID string
|
|
}
|
|
|
|
// VhostUserEndpoint represents a vhost-user socket based network interface
|
|
type VhostUserEndpoint struct {
|
|
// Path to the vhost-user socket on the host system
|
|
SocketPath string
|
|
// MAC address of the interface
|
|
HardAddr string
|
|
IfaceName string
|
|
EndpointProperties NetworkInfo
|
|
EndpointType EndpointType
|
|
}
|
|
|
|
// Properties returns properties for the veth interface in the network pair.
|
|
func (endpoint *VirtualEndpoint) Properties() NetworkInfo {
|
|
return endpoint.EndpointProperties
|
|
}
|
|
|
|
// Name returns name of the veth interface in the network pair.
|
|
func (endpoint *VirtualEndpoint) Name() string {
|
|
return endpoint.NetPair.VirtIface.Name
|
|
}
|
|
|
|
// HardwareAddr returns the mac address that is assigned to the tap interface
|
|
// in th network pair.
|
|
func (endpoint *VirtualEndpoint) HardwareAddr() string {
|
|
return endpoint.NetPair.TAPIface.HardAddr
|
|
}
|
|
|
|
// Type identifies the endpoint as a virtual endpoint.
|
|
func (endpoint *VirtualEndpoint) Type() EndpointType {
|
|
return endpoint.EndpointType
|
|
}
|
|
|
|
// SetProperties sets the properties for the endpoint.
|
|
func (endpoint *VirtualEndpoint) SetProperties(properties NetworkInfo) {
|
|
endpoint.EndpointProperties = properties
|
|
}
|
|
|
|
func networkLogger() *logrus.Entry {
|
|
return virtLog.WithField("subsystem", "network")
|
|
}
|
|
|
|
// Attach for virtual endpoint bridges the network pair and adds the
|
|
// tap interface of the network pair to the hypervisor.
|
|
func (endpoint *VirtualEndpoint) Attach(h hypervisor) error {
|
|
networkLogger().Info("Attaching virtual endpoint")
|
|
if err := xconnectVMNetwork(&(endpoint.NetPair), true); err != nil {
|
|
networkLogger().WithError(err).Error("Error bridging virtual ep")
|
|
return err
|
|
}
|
|
|
|
return h.addDevice(endpoint, netDev)
|
|
}
|
|
|
|
// Detach for the virtual endpoint tears down the tap and bridge
|
|
// created for the veth interface.
|
|
func (endpoint *VirtualEndpoint) Detach() error {
|
|
networkLogger().Info("Detaching virtual endpoint")
|
|
return xconnectVMNetwork(&(endpoint.NetPair), false)
|
|
}
|
|
|
|
// Properties returns the properties of the interface.
|
|
func (endpoint *VhostUserEndpoint) Properties() NetworkInfo {
|
|
return endpoint.EndpointProperties
|
|
}
|
|
|
|
// Name returns name of the interface.
|
|
func (endpoint *VhostUserEndpoint) Name() string {
|
|
return endpoint.IfaceName
|
|
}
|
|
|
|
// HardwareAddr returns the mac address of the vhostuser network interface
|
|
func (endpoint *VhostUserEndpoint) HardwareAddr() string {
|
|
return endpoint.HardAddr
|
|
}
|
|
|
|
// Type indentifies the endpoint as a vhostuser endpoint.
|
|
func (endpoint *VhostUserEndpoint) Type() EndpointType {
|
|
return endpoint.EndpointType
|
|
}
|
|
|
|
// SetProperties sets the properties of the endpoint.
|
|
func (endpoint *VhostUserEndpoint) SetProperties(properties NetworkInfo) {
|
|
endpoint.EndpointProperties = properties
|
|
}
|
|
|
|
// Attach for vhostuser endpoint
|
|
func (endpoint *VhostUserEndpoint) Attach(h hypervisor) error {
|
|
networkLogger().Info("Attaching vhostuser based endpoint")
|
|
|
|
// generate a unique ID to be used for hypervisor commandline fields
|
|
randBytes, err := generateRandomBytes(8)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
id := hex.EncodeToString(randBytes)
|
|
|
|
d := VhostUserNetDevice{
|
|
MacAddress: endpoint.HardAddr,
|
|
}
|
|
d.SocketPath = endpoint.SocketPath
|
|
d.ID = id
|
|
|
|
return h.addDevice(d, vhostuserDev)
|
|
}
|
|
|
|
// Detach for vhostuser endpoint
|
|
func (endpoint *VhostUserEndpoint) Detach() error {
|
|
networkLogger().Info("Detaching vhostuser based endpoint")
|
|
return nil
|
|
}
|
|
|
|
// Create a vhostuser endpoint
|
|
func createVhostUserEndpoint(netInfo NetworkInfo, socket string) (*VhostUserEndpoint, error) {
|
|
|
|
vhostUserEndpoint := &VhostUserEndpoint{
|
|
SocketPath: socket,
|
|
HardAddr: netInfo.Iface.HardwareAddr.String(),
|
|
IfaceName: netInfo.Iface.Name,
|
|
EndpointType: VhostUserEndpointType,
|
|
}
|
|
return vhostUserEndpoint, nil
|
|
}
|
|
|
|
// Properties returns the properties of the physical interface.
|
|
func (endpoint *PhysicalEndpoint) Properties() NetworkInfo {
|
|
return endpoint.EndpointProperties
|
|
}
|
|
|
|
// HardwareAddr returns the mac address of the physical network interface.
|
|
func (endpoint *PhysicalEndpoint) HardwareAddr() string {
|
|
return endpoint.HardAddr
|
|
}
|
|
|
|
// Name returns name of the physical interface.
|
|
func (endpoint *PhysicalEndpoint) Name() string {
|
|
return endpoint.IfaceName
|
|
}
|
|
|
|
// Type indentifies the endpoint as a physical endpoint.
|
|
func (endpoint *PhysicalEndpoint) Type() EndpointType {
|
|
return endpoint.EndpointType
|
|
}
|
|
|
|
// SetProperties sets the properties of the physical endpoint.
|
|
func (endpoint *PhysicalEndpoint) SetProperties(properties NetworkInfo) {
|
|
endpoint.EndpointProperties = properties
|
|
}
|
|
|
|
// Attach for physical endpoint binds the physical network interface to
|
|
// vfio-pci and adds device to the hypervisor with vfio-passthrough.
|
|
func (endpoint *PhysicalEndpoint) Attach(h hypervisor) error {
|
|
networkLogger().Info("Attaching physical endpoint")
|
|
|
|
// Unbind physical interface from host driver and bind to vfio
|
|
// so that it can be passed to qemu.
|
|
if err := bindNICToVFIO(endpoint); err != nil {
|
|
return err
|
|
}
|
|
|
|
d := VFIODevice{
|
|
BDF: endpoint.BDF,
|
|
}
|
|
|
|
return h.addDevice(d, vfioDev)
|
|
}
|
|
|
|
// Detach for physical endpoint unbinds the physical network interface from vfio-pci
|
|
// and binds it back to the saved host driver.
|
|
func (endpoint *PhysicalEndpoint) Detach() error {
|
|
// Bind back the physical network interface to host.
|
|
networkLogger().Info("Detaching physical endpoint")
|
|
return bindNICToHost(endpoint)
|
|
}
|
|
|
|
// EndpointType identifies the type of the network endpoint.
|
|
type EndpointType string
|
|
|
|
const (
|
|
// PhysicalEndpointType is the physical network interface.
|
|
PhysicalEndpointType EndpointType = "physical"
|
|
|
|
// VirtualEndpointType is the virtual network interface.
|
|
VirtualEndpointType EndpointType = "virtual"
|
|
|
|
// VhostUserEndpointType is the vhostuser network interface.
|
|
VhostUserEndpointType EndpointType = "vhost-user"
|
|
)
|
|
|
|
// Set sets an endpoint type based on the input string.
|
|
func (endpointType *EndpointType) Set(value string) error {
|
|
switch value {
|
|
case "physical":
|
|
*endpointType = PhysicalEndpointType
|
|
return nil
|
|
case "virtual":
|
|
*endpointType = VirtualEndpointType
|
|
return nil
|
|
case "vhost-user":
|
|
*endpointType = VhostUserEndpointType
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("Unknown endpoint type %s", value)
|
|
}
|
|
}
|
|
|
|
// String converts an endpoint type to a string.
|
|
func (endpointType *EndpointType) String() string {
|
|
switch *endpointType {
|
|
case PhysicalEndpointType:
|
|
return string(PhysicalEndpointType)
|
|
case VirtualEndpointType:
|
|
return string(VirtualEndpointType)
|
|
case VhostUserEndpointType:
|
|
return string(VhostUserEndpointType)
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// NetworkNamespace contains all data related to its network namespace.
|
|
type NetworkNamespace struct {
|
|
NetNsPath string
|
|
NetNsCreated bool
|
|
Endpoints []Endpoint
|
|
}
|
|
|
|
// TypedJSONEndpoint is used as an intermediate representation for
|
|
// marshalling and unmarshalling Endpoint objects.
|
|
type TypedJSONEndpoint struct {
|
|
Type EndpointType
|
|
Data json.RawMessage
|
|
}
|
|
|
|
// MarshalJSON is the custom NetworkNamespace JSON marshalling routine.
|
|
// This is needed to properly marshall Endpoints array.
|
|
func (n NetworkNamespace) MarshalJSON() ([]byte, error) {
|
|
// We need a shadow structure in order to prevent json from
|
|
// entering a recursive loop when only calling json.Marshal().
|
|
type shadow struct {
|
|
NetNsPath string
|
|
NetNsCreated bool
|
|
Endpoints []TypedJSONEndpoint
|
|
}
|
|
|
|
s := &shadow{
|
|
NetNsPath: n.NetNsPath,
|
|
NetNsCreated: n.NetNsCreated,
|
|
}
|
|
|
|
var typedEndpoints []TypedJSONEndpoint
|
|
for _, endpoint := range n.Endpoints {
|
|
tempJSON, _ := json.Marshal(endpoint)
|
|
|
|
t := TypedJSONEndpoint{
|
|
Type: endpoint.Type(),
|
|
Data: tempJSON,
|
|
}
|
|
|
|
typedEndpoints = append(typedEndpoints, t)
|
|
}
|
|
|
|
s.Endpoints = typedEndpoints
|
|
|
|
b, err := json.Marshal(s)
|
|
return b, err
|
|
}
|
|
|
|
// UnmarshalJSON is the custom NetworkNamespace unmarshalling routine.
|
|
// This is needed for unmarshalling the Endpoints interfaces array.
|
|
func (n *NetworkNamespace) UnmarshalJSON(b []byte) error {
|
|
var s struct {
|
|
NetNsPath string
|
|
NetNsCreated bool
|
|
Endpoints json.RawMessage
|
|
}
|
|
|
|
if err := json.Unmarshal(b, &s); err != nil {
|
|
return err
|
|
}
|
|
|
|
(*n).NetNsPath = s.NetNsPath
|
|
(*n).NetNsCreated = s.NetNsCreated
|
|
|
|
var typedEndpoints []TypedJSONEndpoint
|
|
if err := json.Unmarshal([]byte(string(s.Endpoints)), &typedEndpoints); err != nil {
|
|
return err
|
|
}
|
|
|
|
var endpoints []Endpoint
|
|
|
|
for _, e := range typedEndpoints {
|
|
switch e.Type {
|
|
case PhysicalEndpointType:
|
|
var endpoint PhysicalEndpoint
|
|
err := json.Unmarshal(e.Data, &endpoint)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
endpoints = append(endpoints, &endpoint)
|
|
virtLog.Infof("Physical endpoint unmarshalled [%v]", endpoint)
|
|
|
|
case VirtualEndpointType:
|
|
var endpoint VirtualEndpoint
|
|
err := json.Unmarshal(e.Data, &endpoint)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
endpoints = append(endpoints, &endpoint)
|
|
virtLog.Infof("Virtual endpoint unmarshalled [%v]", endpoint)
|
|
|
|
case VhostUserEndpointType:
|
|
var endpoint VhostUserEndpoint
|
|
err := json.Unmarshal(e.Data, &endpoint)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
endpoints = append(endpoints, &endpoint)
|
|
virtLog.Infof("VhostUser endpoint unmarshalled [%v]", endpoint)
|
|
|
|
default:
|
|
virtLog.Errorf("Unknown endpoint type received %s\n", e.Type)
|
|
}
|
|
}
|
|
|
|
(*n).Endpoints = endpoints
|
|
return nil
|
|
}
|
|
|
|
// NetworkModel describes the type of network specification.
|
|
type NetworkModel string
|
|
|
|
const (
|
|
// NoopNetworkModel is the No-Op network.
|
|
NoopNetworkModel NetworkModel = "noop"
|
|
|
|
// CNINetworkModel is the CNI network.
|
|
CNINetworkModel NetworkModel = "CNI"
|
|
|
|
// CNMNetworkModel is the CNM network.
|
|
CNMNetworkModel NetworkModel = "CNM"
|
|
)
|
|
|
|
// Set sets a network type based on the input string.
|
|
func (networkType *NetworkModel) Set(value string) error {
|
|
switch value {
|
|
case "noop":
|
|
*networkType = NoopNetworkModel
|
|
return nil
|
|
case "CNI":
|
|
*networkType = CNINetworkModel
|
|
return nil
|
|
case "CNM":
|
|
*networkType = CNMNetworkModel
|
|
return nil
|
|
default:
|
|
return fmt.Errorf("Unknown network type %s", value)
|
|
}
|
|
}
|
|
|
|
// String converts a network type to a string.
|
|
func (networkType *NetworkModel) String() string {
|
|
switch *networkType {
|
|
case NoopNetworkModel:
|
|
return string(NoopNetworkModel)
|
|
case CNINetworkModel:
|
|
return string(CNINetworkModel)
|
|
case CNMNetworkModel:
|
|
return string(CNMNetworkModel)
|
|
default:
|
|
return ""
|
|
}
|
|
}
|
|
|
|
// newNetwork returns a network from a network type.
|
|
func newNetwork(networkType NetworkModel) network {
|
|
switch networkType {
|
|
case NoopNetworkModel:
|
|
return &noopNetwork{}
|
|
case CNINetworkModel:
|
|
return &cni{}
|
|
case CNMNetworkModel:
|
|
return &cnm{}
|
|
default:
|
|
return &noopNetwork{}
|
|
}
|
|
}
|
|
|
|
func initNetworkCommon(config NetworkConfig) (string, bool, error) {
|
|
if !config.InterworkingModel.IsValid() || config.InterworkingModel == NetXConnectDefaultModel {
|
|
config.InterworkingModel = DefaultNetInterworkingModel
|
|
}
|
|
|
|
if config.NetNSPath == "" {
|
|
path, err := createNetNS()
|
|
if err != nil {
|
|
return "", false, err
|
|
}
|
|
|
|
return path, true, nil
|
|
}
|
|
|
|
return config.NetNSPath, false, nil
|
|
}
|
|
|
|
func runNetworkCommon(networkNSPath string, cb func() error) error {
|
|
if networkNSPath == "" {
|
|
return fmt.Errorf("networkNSPath cannot be empty")
|
|
}
|
|
|
|
return doNetNS(networkNSPath, func(_ ns.NetNS) error {
|
|
return cb()
|
|
})
|
|
}
|
|
|
|
func addNetworkCommon(sandbox *Sandbox, networkNS *NetworkNamespace) error {
|
|
err := doNetNS(networkNS.NetNsPath, func(_ ns.NetNS) error {
|
|
for _, endpoint := range networkNS.Endpoints {
|
|
if err := endpoint.Attach(sandbox.hypervisor); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
return err
|
|
}
|
|
|
|
func removeNetworkCommon(networkNS NetworkNamespace) error {
|
|
return doNetNS(networkNS.NetNsPath, func(_ ns.NetNS) error {
|
|
for _, endpoint := range networkNS.Endpoints {
|
|
if err := endpoint.Detach(); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
func createLink(netHandle *netlink.Handle, name string, expectedLink netlink.Link) (netlink.Link, []*os.File, error) {
|
|
var newLink netlink.Link
|
|
var fds []*os.File
|
|
|
|
switch expectedLink.Type() {
|
|
case (&netlink.Bridge{}).Type():
|
|
newLink = &netlink.Bridge{
|
|
LinkAttrs: netlink.LinkAttrs{Name: name},
|
|
MulticastSnooping: expectedLink.(*netlink.Bridge).MulticastSnooping,
|
|
}
|
|
case (&netlink.Tuntap{}).Type():
|
|
newLink = &netlink.Tuntap{
|
|
LinkAttrs: netlink.LinkAttrs{Name: name},
|
|
Mode: netlink.TUNTAP_MODE_TAP,
|
|
Queues: defaultQueues,
|
|
Flags: netlink.TUNTAP_MULTI_QUEUE_DEFAULTS | netlink.TUNTAP_VNET_HDR,
|
|
}
|
|
case (&netlink.Macvtap{}).Type():
|
|
qlen := expectedLink.Attrs().TxQLen
|
|
if qlen <= 0 {
|
|
qlen = defaultQlen
|
|
}
|
|
newLink = &netlink.Macvtap{
|
|
Macvlan: netlink.Macvlan{
|
|
Mode: netlink.MACVLAN_MODE_BRIDGE,
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
Index: expectedLink.Attrs().Index,
|
|
Name: name,
|
|
TxQLen: qlen,
|
|
ParentIndex: expectedLink.Attrs().ParentIndex,
|
|
},
|
|
},
|
|
}
|
|
default:
|
|
return nil, fds, fmt.Errorf("Unsupported link type %s", expectedLink.Type())
|
|
}
|
|
|
|
if err := netHandle.LinkAdd(newLink); err != nil {
|
|
return nil, fds, fmt.Errorf("LinkAdd() failed for %s name %s: %s", expectedLink.Type(), name, err)
|
|
}
|
|
|
|
tuntapLink, ok := newLink.(*netlink.Tuntap)
|
|
if ok {
|
|
fds = tuntapLink.Fds
|
|
}
|
|
|
|
newLink, err := getLinkByName(netHandle, name, expectedLink)
|
|
return newLink, fds, err
|
|
}
|
|
|
|
func getLinkByName(netHandle *netlink.Handle, name string, expectedLink netlink.Link) (netlink.Link, error) {
|
|
link, err := netHandle.LinkByName(name)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("LinkByName() failed for %s name %s: %s", expectedLink.Type(), name, err)
|
|
}
|
|
|
|
switch expectedLink.Type() {
|
|
case (&netlink.Bridge{}).Type():
|
|
if l, ok := link.(*netlink.Bridge); ok {
|
|
return l, nil
|
|
}
|
|
case (&netlink.Tuntap{}).Type():
|
|
if l, ok := link.(*netlink.GenericLink); ok {
|
|
return l, nil
|
|
}
|
|
case (&netlink.Veth{}).Type():
|
|
if l, ok := link.(*netlink.Veth); ok {
|
|
return l, nil
|
|
}
|
|
case (&netlink.Macvtap{}).Type():
|
|
if l, ok := link.(*netlink.Macvtap); ok {
|
|
return l, nil
|
|
}
|
|
default:
|
|
return nil, fmt.Errorf("Unsupported link type %s", expectedLink.Type())
|
|
}
|
|
|
|
return nil, fmt.Errorf("Incorrect link type %s, expecting %s", link.Type(), expectedLink.Type())
|
|
}
|
|
|
|
// The endpoint type should dictate how the connection needs to be made
|
|
func xconnectVMNetwork(netPair *NetworkInterfacePair, connect bool) error {
|
|
if netPair.NetInterworkingModel == NetXConnectDefaultModel {
|
|
netPair.NetInterworkingModel = DefaultNetInterworkingModel
|
|
}
|
|
switch netPair.NetInterworkingModel {
|
|
case NetXConnectBridgedModel:
|
|
netPair.NetInterworkingModel = NetXConnectBridgedModel
|
|
if connect {
|
|
return bridgeNetworkPair(netPair)
|
|
}
|
|
return unBridgeNetworkPair(*netPair)
|
|
case NetXConnectMacVtapModel:
|
|
netPair.NetInterworkingModel = NetXConnectMacVtapModel
|
|
if connect {
|
|
return tapNetworkPair(netPair)
|
|
}
|
|
return untapNetworkPair(*netPair)
|
|
case NetXConnectEnlightenedModel:
|
|
return fmt.Errorf("Unsupported networking model")
|
|
default:
|
|
return fmt.Errorf("Invalid internetworking model")
|
|
}
|
|
}
|
|
|
|
func createMacvtapFds(linkIndex int, queues int) ([]*os.File, error) {
|
|
tapDev := fmt.Sprintf("/dev/tap%d", linkIndex)
|
|
return createFds(tapDev, queues)
|
|
}
|
|
|
|
func createVhostFds(numFds int) ([]*os.File, error) {
|
|
vhostDev := "/dev/vhost-net"
|
|
return createFds(vhostDev, numFds)
|
|
}
|
|
|
|
func createFds(device string, numFds int) ([]*os.File, error) {
|
|
fds := make([]*os.File, numFds)
|
|
|
|
for i := 0; i < numFds; i++ {
|
|
f, err := os.OpenFile(device, os.O_RDWR, defaultFilePerms)
|
|
if err != nil {
|
|
cleanupFds(fds, i)
|
|
return nil, err
|
|
}
|
|
fds[i] = f
|
|
}
|
|
return fds, nil
|
|
}
|
|
|
|
// There is a limitation in the linux kernel that prevents a macvtap/macvlan link
|
|
// from getting the correct link index when created in a network namespace
|
|
// https://github.com/clearcontainers/runtime/issues/708
|
|
//
|
|
// Till that bug is fixed we need to pick a random non conflicting index and try to
|
|
// create a link. If that fails, we need to try with another.
|
|
// All the kernel does not check if the link id conflicts with a link id on the host
|
|
// hence we need to offset the link id to prevent any overlaps with the host index
|
|
//
|
|
// Here the kernel will ensure that there is no race condition
|
|
|
|
const hostLinkOffset = 8192 // Host should not have more than 8k interfaces
|
|
const linkRange = 0xFFFF // This will allow upto 2^16 containers
|
|
const linkRetries = 128 // The numbers of time we try to find a non conflicting index
|
|
const macvtapWorkaround = true
|
|
|
|
func createMacVtap(netHandle *netlink.Handle, name string, link netlink.Link) (taplink netlink.Link, err error) {
|
|
|
|
if !macvtapWorkaround {
|
|
taplink, _, err = createLink(netHandle, name, link)
|
|
return
|
|
}
|
|
|
|
r := rand.New(rand.NewSource(time.Now().UnixNano()))
|
|
|
|
for i := 0; i < linkRetries; i++ {
|
|
index := hostLinkOffset + (r.Int() & linkRange)
|
|
link.Attrs().Index = index
|
|
taplink, _, err = createLink(netHandle, name, link)
|
|
if err == nil {
|
|
break
|
|
}
|
|
}
|
|
|
|
return
|
|
}
|
|
|
|
func clearIPs(link netlink.Link, addrs []netlink.Addr) error {
|
|
for _, addr := range addrs {
|
|
if err := netlink.AddrDel(link, &addr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func setIPs(link netlink.Link, addrs []netlink.Addr) error {
|
|
for _, addr := range addrs {
|
|
if err := netlink.AddrAdd(link, &addr); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func tapNetworkPair(netPair *NetworkInterfacePair) error {
|
|
netHandle, err := netlink.NewHandle()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer netHandle.Delete()
|
|
|
|
vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get veth interface: %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
vethLinkAttrs := vethLink.Attrs()
|
|
|
|
// Attach the macvtap interface to the underlying container
|
|
// interface. Also picks relevant attributes from the parent
|
|
tapLink, err := createMacVtap(netHandle, netPair.TAPIface.Name,
|
|
&netlink.Macvtap{
|
|
Macvlan: netlink.Macvlan{
|
|
LinkAttrs: netlink.LinkAttrs{
|
|
TxQLen: vethLinkAttrs.TxQLen,
|
|
ParentIndex: vethLinkAttrs.Index,
|
|
},
|
|
},
|
|
})
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("Could not create TAP interface: %s", err)
|
|
}
|
|
|
|
// Save the veth MAC address to the TAP so that it can later be used
|
|
// to build the hypervisor command line. This MAC address has to be
|
|
// the one inside the VM in order to avoid any firewall issues. The
|
|
// bridge created by the network plugin on the host actually expects
|
|
// to see traffic from this MAC address and not another one.
|
|
tapHardAddr := vethLinkAttrs.HardwareAddr
|
|
netPair.TAPIface.HardAddr = vethLinkAttrs.HardwareAddr.String()
|
|
|
|
if err := netHandle.LinkSetMTU(tapLink, vethLinkAttrs.MTU); err != nil {
|
|
return fmt.Errorf("Could not set TAP MTU %d: %s", vethLinkAttrs.MTU, err)
|
|
}
|
|
|
|
hardAddr, err := net.ParseMAC(netPair.VirtIface.HardAddr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := netHandle.LinkSetHardwareAddr(vethLink, hardAddr); err != nil {
|
|
return fmt.Errorf("Could not set MAC address %s for veth interface %s: %s",
|
|
netPair.VirtIface.HardAddr, netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetHardwareAddr(tapLink, tapHardAddr); err != nil {
|
|
return fmt.Errorf("Could not set MAC address %s for veth interface %s: %s",
|
|
netPair.VirtIface.HardAddr, netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetUp(tapLink); err != nil {
|
|
return fmt.Errorf("Could not enable TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
// Clear the IP addresses from the veth interface to prevent ARP conflict
|
|
netPair.VirtIface.Addrs, err = netlink.AddrList(vethLink, netlink.FAMILY_V4)
|
|
if err != nil {
|
|
return fmt.Errorf("Unable to obtain veth IP addresses: %s", err)
|
|
}
|
|
|
|
if err := clearIPs(vethLink, netPair.VirtIface.Addrs); err != nil {
|
|
return fmt.Errorf("Unable to clear veth IP addresses: %s", err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetUp(vethLink); err != nil {
|
|
return fmt.Errorf("Could not enable veth %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
// Note: The underlying interfaces need to be up prior to fd creation.
|
|
|
|
// Setup the multiqueue fds to be consumed by QEMU as macvtap cannot
|
|
// be directly connected.
|
|
// Ideally we want
|
|
// netdev.FDs, err = createMacvtapFds(netdev.ID, int(config.SMP.CPUs))
|
|
|
|
// We do not have global context here, hence a manifest constant
|
|
// that matches our minimum vCPU configuration
|
|
// Another option is to defer this to ciao qemu library which does have
|
|
// global context but cannot handle errors when setting up the network
|
|
netPair.VMFds, err = createMacvtapFds(tapLink.Attrs().Index, defaultQueues)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not setup macvtap fds %s: %s", netPair.TAPIface, err)
|
|
}
|
|
|
|
vhostFds, err := createVhostFds(defaultQueues)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not setup vhost fds %s : %s", netPair.VirtIface.Name, err)
|
|
}
|
|
netPair.VhostFds = vhostFds
|
|
|
|
return nil
|
|
}
|
|
|
|
func bridgeNetworkPair(netPair *NetworkInterfacePair) error {
|
|
netHandle, err := netlink.NewHandle()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer netHandle.Delete()
|
|
|
|
tapLink, fds, err := createLink(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not create TAP interface: %s", err)
|
|
}
|
|
netPair.VMFds = fds
|
|
|
|
vhostFds, err := createVhostFds(defaultQueues)
|
|
if err != nil {
|
|
return fmt.Errorf("Could not setup vhost fds %s : %s", netPair.VirtIface.Name, err)
|
|
}
|
|
netPair.VhostFds = vhostFds
|
|
|
|
vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get veth interface %s : %s", netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
vethLinkAttrs := vethLink.Attrs()
|
|
|
|
// Save the veth MAC address to the TAP so that it can later be used
|
|
// to build the hypervisor command line. This MAC address has to be
|
|
// the one inside the VM in order to avoid any firewall issues. The
|
|
// bridge created by the network plugin on the host actually expects
|
|
// to see traffic from this MAC address and not another one.
|
|
netPair.TAPIface.HardAddr = vethLinkAttrs.HardwareAddr.String()
|
|
|
|
if err := netHandle.LinkSetMTU(tapLink, vethLinkAttrs.MTU); err != nil {
|
|
return fmt.Errorf("Could not set TAP MTU %d: %s", vethLinkAttrs.MTU, err)
|
|
}
|
|
|
|
hardAddr, err := net.ParseMAC(netPair.VirtIface.HardAddr)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if err := netHandle.LinkSetHardwareAddr(vethLink, hardAddr); err != nil {
|
|
return fmt.Errorf("Could not set MAC address %s for veth interface %s: %s",
|
|
netPair.VirtIface.HardAddr, netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
mcastSnoop := false
|
|
bridgeLink, _, err := createLink(netHandle, netPair.Name, &netlink.Bridge{MulticastSnooping: &mcastSnoop})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not create bridge: %s", err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetMaster(tapLink, bridgeLink.(*netlink.Bridge)); err != nil {
|
|
return fmt.Errorf("Could not attach TAP %s to the bridge %s: %s",
|
|
netPair.TAPIface.Name, netPair.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetUp(tapLink); err != nil {
|
|
return fmt.Errorf("Could not enable TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetMaster(vethLink, bridgeLink.(*netlink.Bridge)); err != nil {
|
|
return fmt.Errorf("Could not attach veth %s to the bridge %s: %s",
|
|
netPair.VirtIface.Name, netPair.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetUp(vethLink); err != nil {
|
|
return fmt.Errorf("Could not enable veth %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetUp(bridgeLink); err != nil {
|
|
return fmt.Errorf("Could not enable bridge %s: %s", netPair.Name, err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func untapNetworkPair(netPair NetworkInterfacePair) error {
|
|
netHandle, err := netlink.NewHandle()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer netHandle.Delete()
|
|
|
|
tapLink, err := getLinkByName(netHandle, netPair.TAPIface.Name, &netlink.Macvtap{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get TAP interface %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkDel(tapLink); err != nil {
|
|
return fmt.Errorf("Could not remove TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
|
|
if err != nil {
|
|
// The veth pair is not totally managed by virtcontainers
|
|
virtLog.Warnf("Could not get veth interface %s: %s", netPair.VirtIface.Name, err)
|
|
} else {
|
|
if err := netHandle.LinkSetDown(vethLink); err != nil {
|
|
return fmt.Errorf("Could not disable veth %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
}
|
|
|
|
// Restore the IPs that were cleared
|
|
err = setIPs(vethLink, netPair.VirtIface.Addrs)
|
|
return err
|
|
}
|
|
|
|
func unBridgeNetworkPair(netPair NetworkInterfacePair) error {
|
|
netHandle, err := netlink.NewHandle()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer netHandle.Delete()
|
|
|
|
tapLink, err := getLinkByName(netHandle, netPair.TAPIface.Name, &netlink.Tuntap{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get TAP interface: %s", err)
|
|
}
|
|
|
|
bridgeLink, err := getLinkByName(netHandle, netPair.Name, &netlink.Bridge{})
|
|
if err != nil {
|
|
return fmt.Errorf("Could not get bridge interface: %s", err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetDown(bridgeLink); err != nil {
|
|
return fmt.Errorf("Could not disable bridge %s: %s", netPair.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetDown(tapLink); err != nil {
|
|
return fmt.Errorf("Could not disable TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetNoMaster(tapLink); err != nil {
|
|
return fmt.Errorf("Could not detach TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkDel(bridgeLink); err != nil {
|
|
return fmt.Errorf("Could not remove bridge %s: %s", netPair.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkDel(tapLink); err != nil {
|
|
return fmt.Errorf("Could not remove TAP %s: %s", netPair.TAPIface.Name, err)
|
|
}
|
|
|
|
vethLink, err := getLinkByName(netHandle, netPair.VirtIface.Name, &netlink.Veth{})
|
|
if err != nil {
|
|
// The veth pair is not totally managed by virtcontainers
|
|
virtLog.WithError(err).Warn("Could not get veth interface")
|
|
} else {
|
|
if err := netHandle.LinkSetDown(vethLink); err != nil {
|
|
return fmt.Errorf("Could not disable veth %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
if err := netHandle.LinkSetNoMaster(vethLink); err != nil {
|
|
return fmt.Errorf("Could not detach veth %s: %s", netPair.VirtIface.Name, err)
|
|
}
|
|
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func createNetNS() (string, error) {
|
|
n, err := ns.NewNS()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return n.Path(), nil
|
|
}
|
|
|
|
// doNetNS is free from any call to a go routine, and it calls
|
|
// into runtime.LockOSThread(), meaning it won't be executed in a
|
|
// different thread than the one expected by the caller.
|
|
func doNetNS(netNSPath string, cb func(ns.NetNS) error) error {
|
|
runtime.LockOSThread()
|
|
defer runtime.UnlockOSThread()
|
|
|
|
currentNS, err := ns.GetCurrentNS()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer currentNS.Close()
|
|
|
|
targetNS, err := ns.GetNS(netNSPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if err := targetNS.Set(); err != nil {
|
|
return err
|
|
}
|
|
defer currentNS.Set()
|
|
|
|
return cb(targetNS)
|
|
}
|
|
|
|
func deleteNetNS(netNSPath string, mounted bool) error {
|
|
n, err := ns.GetNS(netNSPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = n.Close()
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// This unmount part is supposed to be done in the cni/ns package, but the "mounted"
|
|
// flag is not updated when retrieving NetNs handler from GetNS().
|
|
if mounted {
|
|
if err = unix.Unmount(netNSPath, unix.MNT_DETACH); err != nil {
|
|
return fmt.Errorf("Failed to unmount namespace %s: %v", netNSPath, err)
|
|
}
|
|
if err := os.RemoveAll(netNSPath); err != nil {
|
|
return fmt.Errorf("Failed to clean up namespace %s: %v", netNSPath, err)
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func createVirtualNetworkEndpoint(idx int, ifName string, interworkingModel NetInterworkingModel) (*VirtualEndpoint, error) {
|
|
if idx < 0 {
|
|
return &VirtualEndpoint{}, fmt.Errorf("invalid network endpoint index: %d", idx)
|
|
}
|
|
|
|
uniqueID := uuid.Generate().String()
|
|
|
|
hardAddr := net.HardwareAddr{0x02, 0x00, 0xCA, 0xFE, byte(idx >> 8), byte(idx)}
|
|
|
|
endpoint := &VirtualEndpoint{
|
|
// TODO This is too specific. We may need to create multiple
|
|
// end point types here and then decide how to connect them
|
|
// at the time of hypervisor attach and not here
|
|
NetPair: NetworkInterfacePair{
|
|
ID: uniqueID,
|
|
Name: fmt.Sprintf("br%d", idx),
|
|
VirtIface: NetworkInterface{
|
|
Name: fmt.Sprintf("eth%d", idx),
|
|
HardAddr: hardAddr.String(),
|
|
},
|
|
TAPIface: NetworkInterface{
|
|
Name: fmt.Sprintf("tap%d", idx),
|
|
},
|
|
NetInterworkingModel: interworkingModel,
|
|
},
|
|
EndpointType: VirtualEndpointType,
|
|
}
|
|
|
|
if ifName != "" {
|
|
endpoint.NetPair.VirtIface.Name = ifName
|
|
}
|
|
|
|
return endpoint, nil
|
|
}
|
|
|
|
func networkInfoFromLink(handle *netlink.Handle, link netlink.Link) (NetworkInfo, error) {
|
|
addrs, err := handle.AddrList(link, netlink.FAMILY_ALL)
|
|
if err != nil {
|
|
return NetworkInfo{}, err
|
|
}
|
|
|
|
routes, err := handle.RouteList(link, netlink.FAMILY_ALL)
|
|
if err != nil {
|
|
return NetworkInfo{}, err
|
|
}
|
|
|
|
return NetworkInfo{
|
|
Iface: NetlinkIface{
|
|
LinkAttrs: *(link.Attrs()),
|
|
Type: link.Type(),
|
|
},
|
|
Addrs: addrs,
|
|
Routes: routes,
|
|
}, nil
|
|
}
|
|
|
|
func createEndpointsFromScan(networkNSPath string, config NetworkConfig) ([]Endpoint, error) {
|
|
var endpoints []Endpoint
|
|
|
|
netnsHandle, err := netns.GetFromPath(networkNSPath)
|
|
if err != nil {
|
|
return []Endpoint{}, err
|
|
}
|
|
defer netnsHandle.Close()
|
|
|
|
netlinkHandle, err := netlink.NewHandleAt(netnsHandle)
|
|
if err != nil {
|
|
return []Endpoint{}, err
|
|
}
|
|
defer netlinkHandle.Delete()
|
|
|
|
linkList, err := netlinkHandle.LinkList()
|
|
if err != nil {
|
|
return []Endpoint{}, err
|
|
}
|
|
|
|
idx := 0
|
|
for _, link := range linkList {
|
|
var endpoint Endpoint
|
|
|
|
netInfo, err := networkInfoFromLink(netlinkHandle, link)
|
|
if err != nil {
|
|
return []Endpoint{}, err
|
|
}
|
|
|
|
// Ignore unconfigured network interfaces. These are
|
|
// either base tunnel devices that are not namespaced
|
|
// like gre0, gretap0, sit0, ipip0, tunl0 or incorrectly
|
|
// setup interfaces.
|
|
if len(netInfo.Addrs) == 0 {
|
|
continue
|
|
}
|
|
|
|
// Skip any loopback interfaces:
|
|
if (netInfo.Iface.Flags & net.FlagLoopback) != 0 {
|
|
continue
|
|
}
|
|
|
|
if err := doNetNS(networkNSPath, func(_ ns.NetNS) error {
|
|
|
|
// TODO: This is the incoming interface
|
|
// based on the incoming interface we should create
|
|
// an appropriate EndPoint based on interface type
|
|
// This should be a switch
|
|
|
|
// Check if interface is a physical interface. Do not create
|
|
// tap interface/bridge if it is.
|
|
isPhysical, err := isPhysicalIface(netInfo.Iface.Name)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if isPhysical {
|
|
cnmLogger().WithField("interface", netInfo.Iface.Name).Info("Physical network interface found")
|
|
endpoint, err = createPhysicalEndpoint(netInfo)
|
|
} else {
|
|
var socketPath string
|
|
|
|
// Check if this is a dummy interface which has a vhost-user socket associated with it
|
|
socketPath, err = vhostUserSocketPath(netInfo)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
if socketPath != "" {
|
|
cnmLogger().WithField("interface", netInfo.Iface.Name).Info("VhostUser network interface found")
|
|
endpoint, err = createVhostUserEndpoint(netInfo, socketPath)
|
|
} else {
|
|
endpoint, err = createVirtualNetworkEndpoint(idx, netInfo.Iface.Name, config.InterworkingModel)
|
|
}
|
|
}
|
|
|
|
return err
|
|
}); err != nil {
|
|
return []Endpoint{}, err
|
|
}
|
|
|
|
endpoint.SetProperties(netInfo)
|
|
endpoints = append(endpoints, endpoint)
|
|
|
|
idx++
|
|
}
|
|
|
|
return endpoints, nil
|
|
}
|
|
|
|
// isPhysicalIface checks if an interface is a physical device.
|
|
// We use ethtool here to not rely on device sysfs inside the network namespace.
|
|
func isPhysicalIface(ifaceName string) (bool, error) {
|
|
if ifaceName == "lo" {
|
|
return false, nil
|
|
}
|
|
|
|
ethHandle, err := ethtool.NewEthtool()
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
|
|
bus, err := ethHandle.BusInfo(ifaceName)
|
|
if err != nil {
|
|
return false, nil
|
|
}
|
|
|
|
// Check for a pci bus format
|
|
tokens := strings.Split(bus, ":")
|
|
if len(tokens) != 3 {
|
|
return false, nil
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
var sysPCIDevicesPath = "/sys/bus/pci/devices"
|
|
|
|
func createPhysicalEndpoint(netInfo NetworkInfo) (*PhysicalEndpoint, error) {
|
|
// Get ethtool handle to derive driver and bus
|
|
ethHandle, err := ethtool.NewEthtool()
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get BDF
|
|
bdf, err := ethHandle.BusInfo(netInfo.Iface.Name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get Driver
|
|
driver, err := ethHandle.DriverName(netInfo.Iface.Name)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Get vendor and device id from pci space (sys/bus/pci/devices/$bdf)
|
|
|
|
ifaceDevicePath := filepath.Join(sysPCIDevicesPath, bdf, "device")
|
|
contents, err := ioutil.ReadFile(ifaceDevicePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
deviceID := strings.TrimSpace(string(contents))
|
|
|
|
// Vendor id
|
|
ifaceVendorPath := filepath.Join(sysPCIDevicesPath, bdf, "vendor")
|
|
contents, err = ioutil.ReadFile(ifaceVendorPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
vendorID := strings.TrimSpace(string(contents))
|
|
vendorDeviceID := fmt.Sprintf("%s %s", vendorID, deviceID)
|
|
vendorDeviceID = strings.TrimSpace(vendorDeviceID)
|
|
|
|
physicalEndpoint := &PhysicalEndpoint{
|
|
IfaceName: netInfo.Iface.Name,
|
|
HardAddr: netInfo.Iface.HardwareAddr.String(),
|
|
VendorDeviceID: vendorDeviceID,
|
|
EndpointType: PhysicalEndpointType,
|
|
Driver: driver,
|
|
BDF: bdf,
|
|
}
|
|
|
|
return physicalEndpoint, nil
|
|
}
|
|
|
|
func bindNICToVFIO(endpoint *PhysicalEndpoint) error {
|
|
return bindDevicetoVFIO(endpoint.BDF, endpoint.Driver, endpoint.VendorDeviceID)
|
|
}
|
|
|
|
func bindNICToHost(endpoint *PhysicalEndpoint) error {
|
|
return bindDevicetoHost(endpoint.BDF, endpoint.Driver, endpoint.VendorDeviceID)
|
|
}
|
|
|
|
// network is the virtcontainers network interface.
|
|
// Container network plugins are used to setup virtual network
|
|
// between VM netns and the host network physical interface.
|
|
type network interface {
|
|
// init initializes the network, setting a new network namespace.
|
|
init(config NetworkConfig) (string, bool, error)
|
|
|
|
// run runs a callback function in a specified network namespace.
|
|
run(networkNSPath string, cb func() error) error
|
|
|
|
// add adds all needed interfaces inside the network namespace.
|
|
add(sandbox *Sandbox, config NetworkConfig, netNsPath string, netNsCreated bool) (NetworkNamespace, error)
|
|
|
|
// remove unbridges and deletes TAP interfaces. It also removes virtual network
|
|
// interfaces and deletes the network namespace.
|
|
remove(sandbox *Sandbox, networkNS NetworkNamespace) error
|
|
}
|