Skip to content

Commit

Permalink
Release cce-network-v2/2.12.2
Browse files Browse the repository at this point in the history
  • Loading branch information
gola committed Jul 31, 2024
1 parent 774bb41 commit 08bf1c2
Show file tree
Hide file tree
Showing 9 changed files with 136 additions and 39 deletions.
1 change: 1 addition & 0 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,6 @@
cd cce-network-v2

# 生产镜像发布
export EXTRA_GO_BUILD_FLAGS=-gcflags=-trimpath=$GOPATH/src
make docker PROFILE=pro PUSH_IMAGE_FLAGS=--push
make docker-arm GOARCH=arm64 PROFILE=pro PUSH_IMAGE_FLAGS=--push
2 changes: 1 addition & 1 deletion cce-network-v2/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.12.1
2.12.2
5 changes: 5 additions & 0 deletions cce-network-v2/docs/release.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,11 @@ v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如
2. 增加 eni 安全组同步功能, 保持CCE ENI 和节点安全组同步。
3. 增加节点网络配置集功能 NetResourceConfigSet,支持指定节点独立配置网络资源。

#### 2.12.2 [2024/07/09]
1. [Feature] 支持borrowed subnet 可观测,新增 cce_subnet_ips_guage 指标代表子网可用 IP 地址数量
2. [Optimize] borrowed subnet 支持定时同步能力,避免因单次 IP 计算错误,导致错误借用未归还的问题。
3. [Optimize] 更新子网可用 IP 借用语义,单个 ENI 从子网借用 IP 地址数以最新一次为准

#### 2.12.1 [2024/07/02]
1. [Bug] 修复 bbc 机型开启 burstable ENI 时,初始化会导致空指针的问题
2. [Bug] 修复 bbc ENI 不返回实例 id 时,无法选中 ENI 的,影响节点就绪时间的问题
Expand Down
139 changes: 110 additions & 29 deletions cce-network-v2/pkg/bce/bcesync/borrowed_subnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,10 @@ import (
ccev1 "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/apis/cce.baidubce.com/v1"
ccev2 "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/k8s/apis/cce.baidubce.com/v2"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/lock"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/logging/logfields"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/math"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/metrics"
"github.com/sirupsen/logrus"
"k8s.io/apimachinery/pkg/labels"
)

Expand All @@ -17,6 +20,16 @@ var (
once sync.Once
)

const (
IPKindAvailable = "available"
IPKindBorrowed = "borrowed"
IPKindBorrowedAvail = "borrowed_avail"
IPKindCount = "count"
IPKindUsed = "used"

EniTypeSubnet = "subnet"
)

// bsm is a global map of borrowed subnets
type bsm struct {
mutex *lock.RWMutex
Expand All @@ -27,18 +40,20 @@ func InitBSM() error {
once.Do(func() {
globalBSM = &bsm{mutex: &lock.RWMutex{}, subnets: make(map[string]*BorrowedSubnet)}
})
return restoreBSM()
return resyncBSM()
}

// restore bsm when restarting
func restoreBSM() error {
func resyncBSM() error {
// restore all subnets
sbns, err := k8s.CCEClient().Informers.Cce().V1().Subnets().Lister().List(labels.Everything())
if err != nil {
return fmt.Errorf("bsm failed to list subnets: %v", err)
}

var subnets = make(map[string]*BorrowedSubnet)
for _, sbn := range sbns {
globalBSM.subnets[sbn.Spec.ID] = NewBorrowedSubnet(sbn)
subnets[sbn.Spec.ID] = NewBorrowedSubnet(sbn)
}

// restore all tasks
Expand All @@ -47,24 +62,48 @@ func restoreBSM() error {
return fmt.Errorf("bsm failed to list enis: %v", err)
}
for _, eni := range enis {
bsbn, err := globalBSM.GetSubnet(eni.Spec.SubnetID)
if err != nil {
log.WithField("task", "restoreBSM").Errorf("bsm failed to get subnet %q: %v", eni.Spec.SubnetID, err)
continue
bsbn, ok := subnets[eni.Spec.SubnetID]
if !ok {
sbn, err := GlobalBSM().GetSubnet(eni.Spec.SubnetID)
if err != nil {
log.WithField("task", "restoreBSM").Errorf("bsm failed to get subnet %q: %v", eni.Spec.SubnetID, err)
continue
}
bsbn = sbn
subnets[sbn.Spec.ID] = bsbn
}

// should borrow cross subnet IP
if eni.Spec.BorrowIPCount > 0 {
if eni.Spec.BorrowIPCount-eni.Status.LendBorrowedIPCount > 0 {
borrowed := bsbn.Borrow(eni.Spec.ID, eni.Spec.BorrowIPCount-eni.Status.LendBorrowedIPCount)
if borrowed == 0 {
ipCount := math.IntMax(eni.Spec.BorrowIPCount-len(eni.Spec.PrivateIPSet), 0)
if ipCount > 0 {
count := bsbn.forceBorrowForENI(eni.Spec.ID, ipCount)
if count != ipCount {
metrics.IPAMErrorCounter.WithLabelValues(ccev2.ErrorCodeNoAvailableSubnetCreateENI, "ENI", eni.Spec.ID).Inc()
}
}
}

var usedIPCount int
for _, ipset := range eni.Spec.PrivateIPSet {
if ipset.SubnetID == eni.Spec.SubnetID {
usedIPCount++
}
}
metrics.SubnetIPsGuage.WithLabelValues(IPKindBorrowed, eni.Spec.ID, eni.Spec.SubnetID, eni.Spec.NodeName).Set(float64(bsbn.GetBorrowedIPNum(eni.Spec.ID)))
metrics.SubnetIPsGuage.WithLabelValues(IPKindUsed, eni.Spec.ID, eni.Spec.SubnetID, eni.Spec.NodeName).Set(float64(usedIPCount))
}

log.WithField("task", "restoreBSM").Info("bsm restored successfully")
for id, bs := range subnets {
metrics.SubnetIPsGuage.WithLabelValues(IPKindAvailable, EniTypeSubnet, id, EniTypeSubnet).Set(float64(bs.Status.AvailableIPNum))
metrics.SubnetIPsGuage.WithLabelValues(IPKindBorrowed, EniTypeSubnet, id, EniTypeSubnet).Set(float64(bs.BorrowedIPsCount))
metrics.SubnetIPsGuage.WithLabelValues(IPKindBorrowedAvail, EniTypeSubnet, id, EniTypeSubnet).Set(float64(bs.BorrowedAvailableIPsCount))
}
GlobalBSM().mutex.Lock()
GlobalBSM().subnets = subnets
GlobalBSM().mutex.Unlock()

log.WithField("task", "restoreBSM").Debug("resync bsm successfully")
return nil
}

Expand Down Expand Up @@ -96,6 +135,16 @@ func (bsm *bsm) EnsureSubnet(vpcID, sbnID string) (*BorrowedSubnet, error) {
return bsm.GetSubnet(sbnID)
}

func (bsm *bsm) ForceBorrowForENI(eni *ccev2.ENI) {
if eni != nil && eni.Spec.BorrowIPCount > 0 {
bsbn, e := bsm.EnsureSubnet(eni.Spec.ENI.VpcID, eni.Spec.ENI.SubnetID)
if e == nil {
ipCount := math.IntMax(eni.Spec.BorrowIPCount-len(eni.Spec.PrivateIPSet), 0)
bsbn.forceBorrowForENI(eni.Name, ipCount)
}
}
}

// updateSubnet update subnet in bsm
func (bsm *bsm) updateSubnet(sbn *ccev1.Subnet) {
bs, err := bsm.GetSubnet(sbn.Name)
Expand Down Expand Up @@ -127,6 +176,16 @@ func NewBorrowedSubnet(subnet *ccev1.Subnet) *BorrowedSubnet {
}
}

func (bs *BorrowedSubnet) logger() *logrus.Entry {
return log.WithFields(logrus.Fields{
"module": "borrowedSubnet",
"sbnID": bs.SubnetId,
"BorrowedIPsCount": bs.BorrowedIPsCount,
"BorrowedAvailableIPsCount": bs.BorrowedAvailableIPsCount,
"AvailableIPNum": bs.Status.AvailableIPNum,
})
}

func (bs *BorrowedSubnet) update(subnet *ccev1.Subnet) {
bs.mutex.Lock()
defer bs.mutex.Unlock()
Expand All @@ -138,39 +197,61 @@ func (bs *BorrowedSubnet) Borrow(enid string, ipNum int) (borrowedIPNum int) {
bs.mutex.Lock()
defer bs.mutex.Unlock()
if bs.BorrowedAvailableIPsCount < ipNum {
bs.logger().WithFields(logrus.Fields{
"task": "borrow",
"eniID": enid,
"needIPNum": ipNum,
"tasks": logfields.Json(bs.tasks),
}).Warning("subnet not enough available ips to borrow by eni")
return
}

if task, ok := bs.tasks[enid]; !ok {
bs.tasks[enid] = IPBorrowTask{SubnetId: bs.SubnetId, EniID: enid, IPNum: ipNum}
} else {
task.IPNum += ipNum
bs.tasks[enid] = task
}

bs.BorrowedIPsCount += ipNum
bs.BorrowedAvailableIPsCount = bs.Status.AvailableIPNum - bs.BorrowedIPsCount

return ipNum
return bs._forceBorrowForENI(enid, ipNum)
}

func (bs *BorrowedSubnet) forceBorrowForENI(enid string, ipNum int) {
if ipNum <= 0 {
return
}
// forceBorrowForENI borrow ip for eni
// return borrowed ip num
func (bs *BorrowedSubnet) forceBorrowForENI(enid string, ipNum int) int {
bs.mutex.Lock()
defer bs.mutex.Unlock()

return bs._forceBorrowForENI(enid, ipNum)
}

func (bs *BorrowedSubnet) _forceBorrowForENI(enid string, ipNum int) int {
var (
eniBorrowedIPNum int
sbnAvailBorrowIP int
)
if task, ok := bs.tasks[enid]; ok {
bs.BorrowedIPsCount -= task.IPNum
task.IPNum = ipNum
sbnAvailBorrowIP = bs.Status.AvailableIPNum - bs.BorrowedIPsCount
eniBorrowedIPNum = math.IntMin(sbnAvailBorrowIP, ipNum)
task.IPNum = eniBorrowedIPNum
bs.tasks[enid] = task
} else {
bs.tasks[enid] = IPBorrowTask{SubnetId: bs.SubnetId, EniID: enid, IPNum: ipNum}
sbnAvailBorrowIP = bs.Status.AvailableIPNum - bs.BorrowedIPsCount
eniBorrowedIPNum = math.IntMin(sbnAvailBorrowIP, ipNum)
bs.tasks[enid] = IPBorrowTask{SubnetId: bs.SubnetId, EniID: enid, IPNum: eniBorrowedIPNum}
}

bs.BorrowedIPsCount += ipNum
bs.BorrowedIPsCount += eniBorrowedIPNum
bs.BorrowedAvailableIPsCount = bs.Status.AvailableIPNum - bs.BorrowedIPsCount

if eniBorrowedIPNum < ipNum {
bs.logger().WithFields(logrus.Fields{
"task": "forceBorrowForENI",
"eniID": "enid",
"sbnID": bs.SubnetId,
"needIPNum": ipNum,
"eniBorrowedIPNum": eniBorrowedIPNum,
"subnetBorrowedAvailableIPsCount": bs.BorrowedAvailableIPsCount,
"subnetAvailableIPsCount": bs.Status.AvailableIPNum,
"subnetBorrowedIPNum": bs.BorrowedIPsCount,
}).Warning("not enough available ips to force borrow")
}

return eniBorrowedIPNum
}

func (bs *BorrowedSubnet) Done(enid string, ipNum int) {
Expand Down
4 changes: 2 additions & 2 deletions cce-network-v2/pkg/bce/bcesync/borrowed_subnet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ func TestBorrowedSubnet_Borrow(t *testing.T) {

num = bs.Borrow("eni1", 2)
assert.Equal(t, 2, num)
assert.Equal(t, 252, bs.BorrowedAvailableIPsCount)
assert.Equal(t, 3, bs.BorrowedIPsCount)
assert.Equal(t, 253, bs.BorrowedAvailableIPsCount)
assert.Equal(t, 2, bs.BorrowedIPsCount)
assert.Equal(t, 255, bs.Status.AvailableIPNum)

bs.forceBorrowForENI("eni1", 10)
Expand Down
7 changes: 1 addition & 6 deletions cce-network-v2/pkg/bce/bcesync/eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,12 +307,7 @@ func (es *eniSyncher) handleENIUpdate(resource *ccev2.ENI, scopeLog *logrus.Entr
)
defer func() {
if err == nil {
if newObj.Spec.BorrowIPCount > 0 {
bsbn, e := GlobalBSM().EnsureSubnet(newObj.Spec.ENI.VpcID, newObj.Spec.ENI.SubnetID)
if e == nil {
bsbn.forceBorrowForENI(newObj.Name, newObj.Spec.BorrowIPCount-newObj.Status.LendBorrowedIPCount)
}
}
GlobalBSM().ForceBorrowForENI(newObj)
}
}()

Expand Down
1 change: 1 addition & 0 deletions cce-network-v2/pkg/bce/bcesync/subnet.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ func (ss *subnetSyncher) syncSubnet(ctx context.Context) (result []vpc.Subnet, e
}
result = append(result, *sbn)
}
resyncBSM()
return
}

Expand Down
2 changes: 1 addition & 1 deletion cce-network-v2/pkg/bce/vpceni/node_bcc.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ func (n *bccNode) __prepareIPAllocation(scopedLog *logrus.Entry, checkSubnet boo

if subnet, err := bcesync.GlobalBSM().GetSubnet(e.Spec.ENI.SubnetID); err == nil {
if subnet.GetBorrowedIPNum(interfaceID) > 0 {
a.AvailableForAllocationIPv4 = math.IntMin(subnet.BorrowedAvailableIPsCount, availableIPv4OnENI)
a.AvailableForAllocationIPv4 = math.IntMin(subnet.GetBorrowedIPNum(interfaceID), availableIPv4OnENI)
} else {
a.AvailableForAllocationIPv4 = math.IntMin(subnet.BorrowedAvailableIPsCount, availableIPv4OnENI)
}
Expand Down
14 changes: 14 additions & 0 deletions cce-network-v2/pkg/metrics/metrics.go
Original file line number Diff line number Diff line change
Expand Up @@ -353,6 +353,9 @@ var (

// NoAvailableSubnetNodeCount is the counter of nodes that no avaiable subnet to create new eni
IPAMErrorCounter = NoOpCounterVec

// SubnetIPsGuage is the gauge of available IPs in subnet and borrowed IPs by eni
SubnetIPsGuage = NoOpGaugeVec
)

type Configuration struct {
Expand Down Expand Up @@ -381,6 +384,7 @@ type Configuration struct {
KubernetesAPICallsEnabled bool
KubernetesCNPStatusCompletionEnabled bool
IpamEventEnabled bool
SubnetIPsGuageEnabled bool

VersionMetric bool
APILimiterProcessHistoryDuration bool
Expand Down Expand Up @@ -423,6 +427,7 @@ func DefaultMetrics() map[string]struct{} {
Namespace + "_work_queue_event_counter": {},
Namespace + "_controller_handler_duration_milliseconds": {},
Namespace + "_ipam_error_counter": {},
Namespace + "_subnet_ips_guage": {},
}
}

Expand Down Expand Up @@ -592,6 +597,15 @@ func CreateConfiguration(metricsEnabled []string) (Configuration, []prometheus.C
collectors = append(collectors, IpamEvent)
c.IpamEventEnabled = true

case Namespace + "_subnet_ips_guage":
SubnetIPsGuage = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: Namespace,
Name: "subnet_ips_guage",
Help: "Number of IP addresses in a subnet labeled by eni and subnet id",
}, []string{LabelKind, "eniid", "sbnid", "owner"})
collectors = append(collectors, SubnetIPsGuage)
c.SubnetIPsGuageEnabled = true

case Namespace + "_version":
VersionMetric = prometheus.NewGaugeVec(prometheus.GaugeOpts{
Namespace: Namespace,
Expand Down

0 comments on commit 08bf1c2

Please sign in to comment.