Skip to content

Commit

Permalink
Release cce-network-v2/2.8.7
Browse files Browse the repository at this point in the history
  • Loading branch information
gola committed Feb 9, 2024
1 parent 6fcf30a commit 521f6f9
Show file tree
Hide file tree
Showing 21 changed files with 167 additions and 102 deletions.
2 changes: 1 addition & 1 deletion cce-network-v2/GO_VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
go version go1.21.0 linux/amd64
go version go1.21.4 linux/amd64
2 changes: 1 addition & 1 deletion cce-network-v2/VERSION
Original file line number Diff line number Diff line change
@@ -1 +1 @@
2.8.6
2.8.7
8 changes: 7 additions & 1 deletion cce-network-v2/docs/release.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@
v2 版本新架构,支持VPC-ENI 辅助IP和vpc路由。版本发布历史如下:

### 2.8 (2023/08/07)
#### 2.8.6
#### 2.8.7 [暂未发布]
1. [BUG] 修复 cce-network-v2-config 中 --bce-customer-max-eni 及 --bce-customer-max-ip 参数配置不生效
#### 2.8.6 [20231110]
1. [BUG] 优化 EndpointManager 在更新 endpoint 对象时不会超时的逻辑,且由于资源过期等问题会出现死循环的问题
2. [optimize] 优化 operator 工作队列,支持自定义 worker 数量,加速事件处理
3. [optimize] EndpointManager 核心工作流日志,把关键流程日志修改为 info 级别
4. [optimize] 优化EndpointManager gc工作流,动态 IP 分配的 gc 时间设置为一周
5. [optimize] 增加 ENI VPC 状态机流转时没有触发状态变更时重新入队时间,加速 ENI 就绪时间
6. [optimize] 增加增删 ENI 状态变更事件,增加 ENI 的 VPC 非终态日志记录
7. [optimize] 缺少 metaapi 时,记录相关事件
8. [optimize] 当VPC路由满,记录相关事件

#### 2.8.5 [20241017]
1. [优化] 优化了 psts 分配 IP 时失败的回收机制,避免出现 IP 泄露
Expand Down
1 change: 0 additions & 1 deletion cce-network-v2/operator/flags.go
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,6 @@ func init() {
flags.Duration(pkgOption.FixedIPTimeout, defaults.CCEEndpointGCInterval, "Timeout for waiting for the fixed IP assignment to succeed")
option.BindEnv(pkgOption.FixedIPTimeout)


flags.Bool(operatorOption.EnableRemoteFixedIPGC, true, "gc remote fixed ip when endpoint have been deleted")
option.BindEnv(operatorOption.EnableRemoteFixedIPGC)

Expand Down
12 changes: 6 additions & 6 deletions cce-network-v2/operator/option/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,14 +159,14 @@ const (
// BCECloudVPCID allows user to specific vpc
BCECloudVPCID = "bce-cloud-vpc-id"
// BCECloudHost host of iaas api
BCECloudHost = "bce-cloud-host"
BCECloudRegion = "bce-cloud-region"
BCECloudContry = "bce-cloud-country"
BCECloudAccessKey = "bce-cloud-access-key"
BCECloudSecureKey = "bce-cloud-secure-key"
BCECloudHost = "bce-cloud-host"
BCECloudRegion = "bce-cloud-region"
BCECloudContry = "bce-cloud-country"
BCECloudAccessKey = "bce-cloud-access-key"
BCECloudSecureKey = "bce-cloud-secure-key"

ResourceENIResyncInterval = "resource-eni-resync-interval"
ResourceResyncWorkers = "resource-resync-workers"
ResourceResyncWorkers = "resource-resync-workers"

// BCECustomerMaxENI is the max eni number of customer
BCECustomerMaxENI = "bce-customer-max-eni"
Expand Down
4 changes: 2 additions & 2 deletions cce-network-v2/operator/watchers/cce_eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ func StartSynchronizingENI(ctx context.Context, eniManager syncer.ENIEventHandle

enisLister := k8s.CCEClient().Informers.Cce().V2().ENIs().Lister()

var endpointManagerSyncHandler = func(key string) error {
var eniSyncHandler = func(key string) error {
obj, err := enisLister.Get(key)

// Delete handling
Expand All @@ -59,7 +59,7 @@ func StartSynchronizingENI(ctx context.Context, eniManager syncer.ENIEventHandle
resyncPeriod := eniManager.ResyncENI(ctx)
controller := cm.NewResyncController("cce-eni-controller", int(operatorOption.Config.ResourceResyncWorkers),
k8s.CCEClient().Informers.Cce().V2().ENIs().Informer(),
endpointManagerSyncHandler)
eniSyncHandler)
controller.RunWithResync(resyncPeriod)
return nil
}
Expand Down
2 changes: 1 addition & 1 deletion cce-network-v2/pkg/bce/agent/eni_provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ func (eh *eniInitFactory) OnUpdateENI(oldObj, newObj *ccev2.ENI) error {
}
}
}

// set device and route on the woker machine only when eni bound at bcc
if _, ok := eh.localENIs[resource.Spec.ENI.ID]; !ok {
resource.Status.InterfaceIndex = eniLink.linkIndex
Expand Down
12 changes: 10 additions & 2 deletions cce-network-v2/pkg/bce/api/cloud/error.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const (
ErrorReasonRouteRuleRepeated ErrorReason = "RouteRuleRepeated"
ErrorReasonQuotaLimitExceeded ErrorReason = "QuotaLimitExceeded"
ErrorReasonNoSuchObject ErrorReason = "NoSuchObject"
ErrorReasonNoSRouteRuleExceedQuota ErrorReason = "RouteRuleExceedQuota"
)

func ReasonForError(err error) ErrorReason {
Expand All @@ -63,10 +64,12 @@ func ReasonForError(err error) ErrorReason {
return ErrorReasonPrivateIPInUse
case caseInsensitiveContains(errMsg, "RouteRuleRepeated"):
return ErrorReasonRouteRuleRepeated
case caseInsensitiveContains(errMsg, "QuotaLimitExceeded"):
case caseInsensitiveContains(errMsg, "QuotaLimitExceeded"), caseInsensitiveContains(errMsg, "LimitExceeded"):
return ErrorReasonQuotaLimitExceeded
case caseInsensitiveContains(errMsg, "ErrorReasonNoSuchObject"):
return ErrorReasonNoSuchObject
case caseInsensitiveContains(errMsg, "RouteRuleExceedQuota"):
return ErrorReasonNoSRouteRuleExceedQuota
}
}
return ErrorReasonUnknown
Expand Down Expand Up @@ -114,7 +117,12 @@ func IsErrorRouteRuleRepeated(err error) bool {
}

func IsErrorQuotaLimitExceeded(err error) bool {
return ReasonForError(err) == ErrorReasonQuotaLimitExceeded
return ReasonForError(err) == ErrorReasonQuotaLimitExceeded ||
IsErrorQuotaLimitExceeded(err)
}

func IsErrorCreateRouteRuleExceededQuota(err error) bool {
return ReasonForError(err) == ErrorReasonNoSRouteRuleExceedQuota
}

func caseInsensitiveContains(s, substr string) bool {
Expand Down
3 changes: 3 additions & 0 deletions cce-network-v2/pkg/bce/bcesync/bbc_eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/tools/record"
)

var (
Expand All @@ -38,6 +39,8 @@ type bbcENISyncer struct {
bceclient cloud.Interface
resyncPeriod time.Duration
enilister enilisterv2.ENILister

eventRecorder record.EventRecorder
}

// Init initialise the sync manager.
Expand Down
106 changes: 64 additions & 42 deletions cce-network-v2/pkg/bce/bcesync/eni.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,14 @@ import (
"reflect"
"time"

"github.com/sirupsen/logrus"
corev1 "k8s.io/api/core/v1"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/client-go/kubernetes/scheme"
"k8s.io/client-go/tools/record"

"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/api/v1/models"
operatorOption "github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/operator/option"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/bce/api/cloud"
Expand All @@ -19,22 +27,18 @@ import (
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/logging/logfields"
"github.com/baidubce/baiducloud-cce-cni-driver/cce-network-v2/pkg/syncer"
enisdk "github.com/baidubce/bce-sdk-go/services/eni"
"github.com/sirupsen/logrus"
kerrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
)

const (
eniControllerName = "eni-sync-manager"

ENIReadyTimeToAttach = 5 * time.Second
ENIMaxCreateDuration = 10 * time.Minute
ENIMaxCreateDuration = 5 * time.Minute

FinalizerENI = "eni-syncer"
)

var eniLog = logging.NewSubysLogger("bce-sync-manager")
var eniLog = logging.NewSubysLogger(eniControllerName)

// VPCENISyncer only work with single vpc cluster
type VPCENISyncer struct {
Expand All @@ -44,15 +48,18 @@ type VPCENISyncer struct {

// NewVPCENISyncer create a new VPCENISyncer
func (es *VPCENISyncer) Init(ctx context.Context) error {
es.eni = &eniSyncher{}
eventRecorder := k8s.EventBroadcaster().NewRecorder(scheme.Scheme, corev1.EventSource{Component: eniControllerName})

es.eni = &eniSyncher{eventRecorder: eventRecorder}
es.eni.VPCIDs = append(es.eni.VPCIDs, operatorOption.Config.BCECloudVPCID)
es.eni.ClusterID = operatorOption.Config.CCEClusterID
err := es.eni.Init(ctx)
if err != nil {
return err
}

es.bbceni = &bbcENISyncer{}
es.bbceni = &bbcENISyncer{eventRecorder: eventRecorder}

return es.bbceni.Init(ctx)
}

Expand Down Expand Up @@ -104,6 +111,8 @@ type eniSyncher struct {
updater syncer.ENIUpdater
bceclient cloud.Interface
resyncPeriod time.Duration

eventRecorder record.EventRecorder
}

// Init initialise the sync manager.
Expand Down Expand Up @@ -226,10 +235,10 @@ func (ss *eniSyncher) createExternalENI(eni *enisdk.Eni) {
}
_, err = ss.updater.Create(newENI)
if err != nil {
scopeLog.WithError(err).Errorf("create external eni failed")
ss.eventRecorder.Eventf(resource, corev1.EventTypeWarning, "FailedCreateExternalENI", "Failed to create external ENI on nrs %s: %s", resource.Name, err)
return
}
scopeLog.Infof("create external eni success")
ss.eventRecorder.Eventf(resource, corev1.EventTypeNormal, "CreateExternalENISuccess", "create external ENI %s on nrs %s success", eni.EniId, resource.Name)
}

// Create Process synchronization of new enis
Expand All @@ -246,10 +255,11 @@ func (ss *eniSyncher) Update(resource *ccev2.ENI) error {
return nil
}
var (
newObj = resource.DeepCopy()
eniStatus *ccev2.ENIStatus
err error
ctx = logfields.NewContext()
newObj = resource.DeepCopy()
eniStatus *ccev2.ENIStatus
err error
updateError error
ctx = logfields.NewContext()
)

scopeLog := eniLog.WithFields(logrus.Fields{
Expand All @@ -272,9 +282,22 @@ func (ss *eniSyncher) Update(resource *ccev2.ENI) error {
}

err = machine.start()
if _, ok := err.(*cm.DelayEvent); err != nil && !ok {
scopeLog.WithError(err).Error("eni machine failed")
return err
_, isDelayError := err.(*cm.DelayEvent)
if err != nil {
if isDelayError {
// if vpc status is not changed, will retry after 5s
if newObj.Status.VPCStatus == resource.Status.VPCStatus {
scopeLog.Infof("eni vpc status not changed, will retry later")
return err
} else {
// if vpc status is changed, will upate status on apiserver
goto updateAPIServer
}
} else {
scopeLog.WithError(err).Error("eni machine failed")
return err
}

}

scopeLog.Debug("start refresh eni")
Expand All @@ -285,27 +308,28 @@ func (ss *eniSyncher) Update(resource *ccev2.ENI) error {
}
}

updateAPIServer:
eniStatus = &newObj.Status
// update spec and status
if !reflect.DeepEqual(&newObj.Spec, &resource.Spec) ||
!reflect.DeepEqual(newObj.Labels, resource.Labels) ||
!reflect.DeepEqual(newObj.Finalizers, resource.Finalizers) {
scopeLog.Debug("start update eni spec")
newObj, err = ss.updater.Update(newObj)
if err != nil {
scopeLog.WithError(err).Error("update eni spec failed")
return err
newObj, updateError = ss.updater.Update(newObj)
if updateError != nil {
scopeLog.WithError(updateError).Error("update eni spec failed")
return updateError
}
scopeLog.Info("update eni spec success")
}

if !reflect.DeepEqual(eniStatus, &resource.Status) {
newObj.Status = *eniStatus
scopeLog.Debug("start update eni status")
_, err = ss.updater.UpdateStatus(newObj)
if err != nil {
scopeLog.WithError(err).Error("update eni status failed")
return err
_, updateError = ss.updater.UpdateStatus(newObj)
if updateError != nil {
scopeLog.WithError(updateError).Error("update eni status failed")
return updateError
}
scopeLog.Info("update eni status success")
}
Expand Down Expand Up @@ -448,17 +472,19 @@ type eniStateMachine struct {
ss *eniSyncher
ctx context.Context
resource *ccev2.ENI
vpceni *eni.Eni
}

// Start state machine flow
func (esm *eniStateMachine) start() error {
var err error
if esm.resource.Status.VPCStatus != ccev2.VPCENIStatusInuse {

// refresh status of ENI
esm.vpceni, err = esm.ss.statENI(esm.ctx, esm.resource.Name)
if err != nil {
return fmt.Errorf("eni state machine failed to refresh eni(%s) status: %v", esm.resource.Name, err)
}
switch esm.resource.Status.VPCStatus {
case ccev2.VPCENIStatusNone:
// refresh status of ENI
_, err = esm.ss.statENI(esm.ctx, esm.resource.Name)
case ccev2.VPCENIStatusAvailable:
err = esm.attachENI()
case ccev2.VPCENIStatusAttaching:
Expand All @@ -475,6 +501,7 @@ func (esm *eniStateMachine) start() error {
return err
}

(&esm.resource.Status).AppendVPCStatus(ccev2.VPCENIStatus(esm.vpceni.Status))
// not the final status, will retry later
return cm.NewDelayEvent(esm.resource.Name, ENIReadyTimeToAttach, fmt.Sprintf("eni %s status is not final: %s", esm.resource.Spec.ENI.ID, esm.resource.Status.VPCStatus))
}
Expand All @@ -484,12 +511,8 @@ func (esm *eniStateMachine) start() error {
// attachENI attach a ENI to instance
// Only accept calls whose ENI status is "available"
func (esm *eniStateMachine) attachENI() error {
eniCache, err := esm.ss.statENI(esm.ctx, esm.resource.Spec.ENI.ID)
if err != nil {
return err
}
// status is not match
if eniCache.Status != string(ccev2.VPCENIStatusAvailable) {
if esm.vpceni.Status != string(ccev2.VPCENIStatusAvailable) {
return nil
}

Expand All @@ -499,14 +522,15 @@ func (esm *eniStateMachine) attachENI() error {
}

// try to attach eni to bcc instance
err = esm.ss.bceclient.AttachENI(esm.ctx, &enisdk.EniInstance{
err := esm.ss.bceclient.AttachENI(esm.ctx, &enisdk.EniInstance{
InstanceId: esm.resource.Spec.ENI.InstanceID,
EniId: esm.resource.Spec.ENI.ID,
})
if err != nil {
esm.ss.eventRecorder.Eventf(esm.resource, corev1.EventTypeWarning, "AttachENIFailed", "failed attach eni(%s) to %s, will delete it: %v", esm.resource.Spec.ENI.ID, esm.resource.Spec.ENI.InstanceID, err)

err2 := esm.deleteENI()
err = fmt.Errorf("failed to attach eni(%s) to instance(%s): %s, delete eni crd: %s", esm.resource.Spec.ENI.ID, esm.resource.Spec.ENI.InstanceID, err.Error(), err2.Error())

return err
}

Expand All @@ -520,9 +544,10 @@ func (esm *eniStateMachine) attachENI() error {
func (esm *eniStateMachine) deleteENI() error {
err := esm.ss.bceclient.DeleteENI(esm.ctx, esm.resource.Spec.ENI.ID)
if err != nil {
esm.ss.eventRecorder.Eventf(esm.resource, corev1.EventTypeWarning, "DeleteENIFailed", "failed to delete eni(%s): %v", esm.resource.Spec.ENI.ID, err)
return fmt.Errorf("failed to delete eni(%s): %s", esm.resource.Spec.ENI.ID, err.Error())
}

esm.ss.eventRecorder.Eventf(esm.resource, corev1.EventTypeWarning, "DeleteENISuccess", "delete eni(%s) success", esm.resource.Spec.ENI.ID)
// delete resource after delete eni in cloud
err = esm.ss.updater.Delete(esm.resource.Name)
if err != nil {
Expand All @@ -535,16 +560,13 @@ func (esm *eniStateMachine) deleteENI() error {
// attachingENI Processing ENI in the attaching state
// ENI may be stuck in the attaching state for a long time and need to be manually deleted
func (esm *eniStateMachine) attachingENI() error {
eniCache, err := esm.ss.statENI(esm.ctx, esm.resource.Spec.ENI.ID)
if err != nil {
return err
}
// status is not match
if eniCache.Status != string(ccev2.VPCENIStatusAttaching) {
if esm.vpceni.Status != string(ccev2.VPCENIStatusAttaching) {
return nil
}

if esm.resource.CreationTimestamp.Add(ENIMaxCreateDuration).Before(time.Now()) {
esm.ss.eventRecorder.Eventf(esm.resource, corev1.EventTypeWarning, "AttachingENIError", "eni(%s) is in attaching status more than %s, will delete it", esm.resource.Spec.ENI.ID, ENIMaxCreateDuration.String())
return esm.deleteENI()
}
return nil
Expand Down
Loading

0 comments on commit 521f6f9

Please sign in to comment.