From baa904f6518fd95a3f3ab954499f92e72ea5699e Mon Sep 17 00:00:00 2001 From: Valentin Knabel Date: Wed, 11 Sep 2024 11:24:16 +0200 Subject: [PATCH 1/4] chore(deps): bump metal-lib (#569) --- go.mod | 2 +- go.sum | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/go.mod b/go.mod index b738718f..eaa3fd8f 100644 --- a/go.mod +++ b/go.mod @@ -20,7 +20,7 @@ require ( github.com/looplab/fsm v1.0.2 github.com/metal-stack/go-ipam v1.14.5 github.com/metal-stack/masterdata-api v0.11.4 - github.com/metal-stack/metal-lib v0.18.1 + github.com/metal-stack/metal-lib v0.18.2 github.com/metal-stack/security v0.8.1 github.com/metal-stack/v v1.0.3 github.com/nsqio/go-nsq v1.1.0 diff --git a/go.sum b/go.sum index 1fb0b970..1b9ddb4c 100644 --- a/go.sum +++ b/go.sum @@ -301,8 +301,8 @@ github.com/metal-stack/go-ipam v1.14.5 h1:KSnftPoySufz/SSbAmtCqo/HzmlYuyVMSfMi53 github.com/metal-stack/go-ipam v1.14.5/go.mod h1:K/ax3O8oPYIClpEpSLmu0a2NfKM/9qNrNLa05cdYndY= github.com/metal-stack/masterdata-api v0.11.4 h1:bgRk7PbD5BjYbmAReaV7gTKKKrW5x/ZzCwj98VSWoJk= github.com/metal-stack/masterdata-api v0.11.4/go.mod h1:fD0AtsoNNaOLqRMBeZzDFljiQW9RlrOnxeZ20Pqhxas= -github.com/metal-stack/metal-lib v0.18.1 h1:Kjmf/Z/6pWemR8O6ttbNPQ9PjeT3ON60sBNu51Lgi1M= -github.com/metal-stack/metal-lib v0.18.1/go.mod h1:GJjipRpHmpd2vjBtsaw9gGk5ZFan7NlShyjIsTdY1x4= +github.com/metal-stack/metal-lib v0.18.2 h1:EAmZkZeKpenAvxZRSKsA6gj9Jd8XLR6Z0/QhABFCCDE= +github.com/metal-stack/metal-lib v0.18.2/go.mod h1:GJjipRpHmpd2vjBtsaw9gGk5ZFan7NlShyjIsTdY1x4= github.com/metal-stack/security v0.8.1 h1:4zmVUxZvDWShVvVIxM3XhIv7pTmPe9DvACRIHW6YTsk= github.com/metal-stack/security v0.8.1/go.mod h1:OO8ZilZO6fUV5QEmwc7HP/RAjqYrGQxXoYIddJ9TvqE= github.com/metal-stack/v v1.0.3 h1:Sh2oBlnxrCUD+mVpzfC8HiqL045YWkxs0gpTvkjppqs= From 732b6caf171286227c5642f878bde19c3d10d16f Mon Sep 17 00:00:00 2001 From: Gerrit Date: Fri, 13 Sep 2024 11:55:32 +0200 Subject: [PATCH 2/4] Take amount of reservations into account for free machine count. (#538) --- cmd/metal-api/internal/issues/types.go | 12 - .../internal/service/partition-service.go | 36 +- .../service/partition-service_test.go | 499 ++++++++++++++++-- .../internal/service/v1/partition.go | 45 +- spec/metal-api.json | 42 +- 5 files changed, 538 insertions(+), 96 deletions(-) diff --git a/cmd/metal-api/internal/issues/types.go b/cmd/metal-api/internal/issues/types.go index db05f05b..2ff13ad0 100644 --- a/cmd/metal-api/internal/issues/types.go +++ b/cmd/metal-api/internal/issues/types.go @@ -24,18 +24,6 @@ func AllIssueTypes() []Type { } } -func NotAllocatableIssueTypes() []Type { - return []Type{ - TypeNoPartition, - TypeLivelinessDead, - TypeLivelinessUnknown, - TypeLivelinessNotAvailable, - TypeFailedMachineReclaim, - TypeCrashLoop, - TypeNoEventContainer, - } -} - func NewIssueFromType(t Type) (issue, error) { switch t { case TypeNoPartition: diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index b7fe1ca6..ee0b4c33 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -378,7 +378,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque machinesWithIssues, err := issues.Find(&issues.Config{ Machines: ms, EventContainers: ecs, - Only: issues.NotAllocatableIssueTypes(), + Omit: []issues.Type{issues.TypeLastEventError}, }) if err != nil { return nil, fmt.Errorf("unable to calculate machine issues: %w", err) @@ -436,24 +436,32 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque cap.Total++ - if m.Allocation != nil { - cap.Allocated++ - continue - } - if _, ok := machinesWithIssues[m.ID]; ok { cap.Faulty++ cap.FaultyMachines = append(cap.FaultyMachines, m.ID) - continue } - if m.State.Value == metal.AvailableState && metal.ProvisioningEventWaiting == pointer.FirstOrZero(ec.Events).Event { + // allocation dependent counts + switch { + case m.Allocation != nil: + cap.Allocated++ + case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState: + // the free machine count considers the same aspects as the query for electing the machine candidate! cap.Free++ - continue + default: + cap.Unavailable++ } - cap.Other++ - cap.OtherMachines = append(cap.OtherMachines, m.ID) + // provisioning state dependent counts + switch pointer.FirstOrZero(ec.Events).Event { //nolint:exhaustive + case metal.ProvisioningEventPhonedHome: + cap.PhonedHome++ + case metal.ProvisioningEventWaiting: + cap.Waiting++ + default: + cap.Other++ + cap.OtherMachines = append(cap.OtherMachines, m.ID) + } } res := []v1.PartitionCapacity{} @@ -466,10 +474,12 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque size := sizesByID[cap.Size] for _, reservation := range size.Reservations.ForPartition(pc.ID) { - reservation := reservation + usedReservations := min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) cap.Reservations += reservation.Amount - cap.UsedReservations += min(len(machinesByProject[reservation.ProjectID].WithSize(size.ID).WithPartition(pc.ID)), reservation.Amount) + cap.UsedReservations += usedReservations + cap.Free -= reservation.Amount - usedReservations + cap.Free = max(cap.Free, 0) } } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index a54d80ce..e1d821a7 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -7,8 +7,11 @@ import ( "log/slog" "net/http" "net/http/httptest" + "slices" "testing" + "time" + "github.com/google/go-cmp/cmp" "github.com/stretchr/testify/assert" r "gopkg.in/rethinkdb/rethinkdb-go.v6" @@ -18,6 +21,7 @@ import ( v1 "github.com/metal-stack/metal-api/cmd/metal-api/internal/service/v1" "github.com/metal-stack/metal-api/cmd/metal-api/internal/testdata" "github.com/metal-stack/metal-lib/httperrors" + "github.com/metal-stack/metal-lib/pkg/pointer" "github.com/stretchr/testify/require" ) @@ -245,49 +249,458 @@ func TestUpdatePartition(t *testing.T) { } func TestPartitionCapacity(t *testing.T) { - ds, mock := datastore.InitMockDB(t) - - ecs := []metal.ProvisioningEventContainer{} - for _, m := range testdata.TestMachines { - m := m - ecs = append(ecs, metal.ProvisioningEventContainer{ - Base: m.Base, + var ( + mockMachines = func(mock *r.Mock, reservations []metal.Reservation, ms ...metal.Machine) { + var ( + sizes metal.Sizes + events metal.ProvisioningEventContainers + partitions metal.Partitions + ) + + for _, m := range ms { + ec := metal.ProvisioningEventContainer{Base: metal.Base{ID: m.ID}, Liveliness: metal.MachineLivelinessAlive} + if m.Waiting { + ec.Events = append(ec.Events, metal.ProvisioningEvent{ + Event: metal.ProvisioningEventWaiting, + }) + } + if m.Allocation != nil { + ec.Events = append(ec.Events, metal.ProvisioningEvent{ + Event: metal.ProvisioningEventPhonedHome, + }) + } + events = append(events, ec) + if !slices.ContainsFunc(sizes, func(s metal.Size) bool { + return s.ID == m.SizeID + }) { + s := metal.Size{Base: metal.Base{ID: m.SizeID}} + sizes = append(sizes, s) + } + if !slices.ContainsFunc(partitions, func(p metal.Partition) bool { + return p.ID == m.PartitionID + }) { + partitions = append(partitions, metal.Partition{Base: metal.Base{ID: m.PartitionID}}) + } + } + + if len(reservations) > 0 { + for i := range sizes { + sizes[i].Reservations = append(sizes[i].Reservations, reservations...) + } + } + + mock.On(r.DB("mockdb").Table("machine")).Return(ms, nil) + mock.On(r.DB("mockdb").Table("event")).Return(events, nil) + mock.On(r.DB("mockdb").Table("partition")).Return(partitions, nil) + mock.On(r.DB("mockdb").Table("size")).Return(sizes, nil) + } + + machineTpl = func(id, partition, size, project string) metal.Machine { + m := metal.Machine{ + Base: metal.Base{ID: id}, + PartitionID: partition, + SizeID: size, + IPMI: metal.IPMI{ // required for healthy machine state + Address: "1.2.3." + id, + MacAddress: "aa:bb:0" + id, + LastUpdated: time.Now().Add(-1 * time.Minute), + }, + State: metal.MachineState{ + Value: metal.AvailableState, + }, + } + if project != "" { + m.Allocation = &metal.MachineAllocation{ + Project: project, + } + } + return m + } + ) + + tests := []struct { + name string + mockFn func(mock *r.Mock) + want []*v1.PartitionCapacity + }{ + { + name: "one allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + mockMachines(mock, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + PhonedHome: 1, + Allocated: 1, + }, + }, + }, + }, + }, + { + name: "two allocated machines", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + mockMachines(mock, nil, m1, m2) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 2, + PhonedHome: 2, + Allocated: 2, + }, + }, + }, + }, + }, + { + name: "one faulty, allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m1.IPMI.Address = "" + mockMachines(mock, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + PhonedHome: 1, + Faulty: 1, + Allocated: 1, + FaultyMachines: []string{"1"}, + }, + }, + }, + }, + }, + { + name: "one waiting machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + mockMachines(mock, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + }, + }, + }, + }, + }, + { + name: "one waiting, one allocated machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + mockMachines(mock, nil, m1, m2) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 2, + Allocated: 1, + Waiting: 1, + PhonedHome: 1, + Free: 1, + }, + }, + }, + }, + }, + { + name: "one free machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + m1.State.Value = metal.AvailableState + mockMachines(mock, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + }, + }, + }, + }, + }, + { + name: "one machine rebooting", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = false + mockMachines(mock, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Other: 1, + Unavailable: 1, + OtherMachines: []string{"1"}, + }, + }, + }, + }, + }, + { + name: "reserved machine does not count as free", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 0, + Reservations: 1, + UsedReservations: 0, + }, + }, + }, + }, + }, + { + name: "overbooked partition, free count capped at 0", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + { + Amount: 2, + ProjectID: "project-456", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 0, + Reservations: 3, + UsedReservations: 0, + }, + }, + }, + }, + }, + { + name: "reservations already used up (edge)", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 2, + UsedReservations: 2, + PhonedHome: 2, + }, + }, + }, + }, + }, + { + name: "reservations already used up", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 1, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 1, + UsedReservations: 1, + PhonedHome: 2, + }, + }, + }, + }, + }, + { + name: "other partition size reservation has no influence", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "project-123") + m2 := machineTpl("2", "partition-a", "size-a", "project-123") + m3 := machineTpl("3", "partition-a", "size-a", "") + m3.Waiting = true + + reservations := []metal.Reservation{ + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-a"}, + }, + { + Amount: 2, + ProjectID: "project-123", + PartitionIDs: []string{"partition-b"}, + }, + } + + mockMachines(mock, reservations, m1, m2, m3) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 3, + Allocated: 2, + Waiting: 1, + Free: 1, + Reservations: 2, + UsedReservations: 2, + PhonedHome: 2, + }, + }, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + var ( + ds, mock = datastore.InitMockDB(t) + body = &v1.PartitionCapacityRequest{} + ws = NewPartition(slog.Default(), ds, nil) + ) + + if tt.mockFn != nil { + tt.mockFn(mock) + } + + code, got := genericWebRequest[[]*v1.PartitionCapacity](t, ws, testViewUser, body, "POST", "/v1/partition/capacity") + assert.Equal(t, http.StatusOK, code) + + if diff := cmp.Diff(tt.want, got); diff != "" { + t.Errorf("diff (-want +got):\n%s", diff) + } }) } - mock.On(r.DB("mockdb").Table("event")).Return(ecs, nil) - - testdata.InitMockDBData(mock) - log := slog.Default() - - service := NewPartition(log, ds, &nopTopicCreator{}) - container := restful.NewContainer().Add(service) - - pcRequest := &v1.PartitionCapacityRequest{} - js, err := json.Marshal(pcRequest) - require.NoError(t, err) - body := bytes.NewBuffer(js) - - req := httptest.NewRequest("POST", "/v1/partition/capacity", body) - req.Header.Add("Content-Type", "application/json") - container = injectAdmin(log, container, req) - w := httptest.NewRecorder() - container.ServeHTTP(w, req) - - resp := w.Result() - defer resp.Body.Close() - require.Equal(t, http.StatusOK, resp.StatusCode, w.Body.String()) - var result []v1.PartitionCapacity - err = json.NewDecoder(resp.Body).Decode(&result) - - require.NoError(t, err) - require.Len(t, result, 1) - require.Equal(t, testdata.Partition1.ID, result[0].ID) - require.NotNil(t, result[0].ServerCapacities) - require.Len(t, result[0].ServerCapacities, 1) - c := result[0].ServerCapacities[0] - require.Equal(t, "1", c.Size) - require.Equal(t, 5, c.Total) - require.Equal(t, 0, c.Free) - require.Equal(t, 3, c.Reservations) - require.Equal(t, 1, c.UsedReservations) } diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index beed5d2f..fc9b8c3a 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -49,17 +49,42 @@ type PartitionCapacity struct { ServerCapacities ServerCapacities `json:"servers" description:"servers available in this partition"` } +// ServerCapacity holds the machine capacity of a partition of a specific size. +// The amount of allocated, waiting and other machines sum up to the total amount of machines. type ServerCapacity struct { - Size string `json:"size" description:"the size of the server"` - Total int `json:"total" description:"total amount of servers with this size"` - Free int `json:"free" description:"free servers with this size"` - Allocated int `json:"allocated" description:"allocated servers with this size"` - Reservations int `json:"reservations" description:"the amount of reservations for this size"` - UsedReservations int `json:"usedreservations" description:"the amount of used reservations for this size"` - Faulty int `json:"faulty" description:"servers with issues with this size"` - FaultyMachines []string `json:"faultymachines" description:"servers with issues with this size"` - Other int `json:"other" description:"servers neither free, allocated or faulty with this size"` - OtherMachines []string `json:"othermachines" description:"servers neither free, allocated or faulty with this size"` + // Size is the size id correlating to all counts in this server capacity. + Size string `json:"size" description:"the size of the machine"` + + // Total is the total amount of machines for this size. + Total int `json:"total,omitempty" description:"total amount of machines with size"` + + // PhonedHome is the amount of machines that are currently in the provisioning state "phoned home". + PhonedHome int `json:"phoned_home,omitempty" description:"machines in phoned home provisioning state"` + // Waiting is the amount of machines that are currently in the provisioning state "waiting". + Waiting int `json:"waiting,omitempty" description:"machines in waiting provisioning state"` + // Other is the amount of machines that are neither in the provisioning state waiting nor in phoned home but in another provisioning state. + Other int `json:"other,omitempty" description:"machines neither phoned home nor waiting but in another provisioning state"` + // OtherMachines contains the machine IDs for machines that were classified into "Other". + OtherMachines []string `json:"othermachines,omitempty" description:"machine ids neither allocated nor waiting with this size"` + + // Allocated is the amount of machines that are currently allocated. + Allocated int `json:"allocated,omitempty" description:"allocated machines"` + // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. + // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. + Free int `json:"free,omitempty" description:"free machines with this size (freely allocatable)"` + // Unavailable is the amount of machine in a partition that are currently not allocatable because they are not waiting or + // not in the machine state "available", e.g. locked or reserved. + Unavailable int `json:"unavailable,omitempty" description:"unavailable machines with this size"` + + // Faulty is the amount of machines that are neither allocated nor in the pool of available machines because they report an error. + Faulty int `json:"faulty,omitempty" description:"machines with issues with this size"` + // FaultyMachines contains the machine IDs for machines that were classified into "Faulty". + FaultyMachines []string `json:"faultymachines,omitempty" description:"machine ids with issues with this size"` + + // Reservations is the amount of reservations made for this size. + Reservations int `json:"reservations,omitempty" description:"the amount of reservations for this size"` + // UsedReservations is the amount of reservations already used up for this size. + UsedReservations int `json:"usedreservations,omitempty" description:"the amount of used reservations for this size"` } func NewPartitionResponse(p *metal.Partition) *PartitionResponse { diff --git a/spec/metal-api.json b/spec/metal-api.json index 4c1da80c..b67cf276 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4400,50 +4400,60 @@ "v1.ServerCapacity": { "properties": { "allocated": { - "description": "allocated servers with this size", + "description": "allocated machines", "format": "int32", "type": "integer" }, "faulty": { - "description": "servers with issues with this size", + "description": "machines with issues with this size", "format": "int32", "type": "integer" }, "faultymachines": { - "description": "servers with issues with this size", + "description": "machine ids with issues with this size", "items": { "type": "string" }, "type": "array" }, "free": { - "description": "free servers with this size", + "description": "free machines with this size (freely allocatable)", "format": "int32", "type": "integer" }, "other": { - "description": "servers neither free, allocated or faulty with this size", + "description": "machines neither phoned home nor waiting but in another provisioning state", "format": "int32", "type": "integer" }, "othermachines": { - "description": "servers neither free, allocated or faulty with this size", + "description": "machine ids neither allocated nor waiting with this size", "items": { "type": "string" }, "type": "array" }, + "phoned_home": { + "description": "machines in phoned home provisioning state", + "format": "int32", + "type": "integer" + }, "reservations": { "description": "the amount of reservations for this size", "format": "int32", "type": "integer" }, "size": { - "description": "the size of the server", + "description": "the size of the machine", "type": "string" }, "total": { - "description": "total amount of servers with this size", + "description": "total amount of machines with size", + "format": "int32", + "type": "integer" + }, + "unavailable": { + "description": "unavailable machines with this size", "format": "int32", "type": "integer" }, @@ -4451,19 +4461,15 @@ "description": "the amount of used reservations for this size", "format": "int32", "type": "integer" + }, + "waiting": { + "description": "machines in waiting provisioning state", + "format": "int32", + "type": "integer" } }, "required": [ - "allocated", - "faulty", - "faultymachines", - "free", - "other", - "othermachines", - "reservations", - "size", - "total", - "usedreservations" + "size" ] }, "v1.SizeConstraint": { From 70670a11ebb985316a9739b99c6b00fa77b60729 Mon Sep 17 00:00:00 2001 From: Gerrit Date: Fri, 13 Sep 2024 15:55:25 +0200 Subject: [PATCH 3/4] Exclude dead machines from free count. (#572) --- .../internal/service/partition-service.go | 2 +- .../service/partition-service_test.go | 54 ++++++++++++++----- 2 files changed, 41 insertions(+), 15 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index ee0b4c33..f783ac4a 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -445,7 +445,7 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque switch { case m.Allocation != nil: cap.Allocated++ - case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState: + case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState && ec.Liveliness == metal.MachineLivelinessAlive: // the free machine count considers the same aspects as the query for electing the machine candidate! cap.Free++ default: diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index e1d821a7..fd6b15a8 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -250,7 +250,7 @@ func TestUpdatePartition(t *testing.T) { func TestPartitionCapacity(t *testing.T) { var ( - mockMachines = func(mock *r.Mock, reservations []metal.Reservation, ms ...metal.Machine) { + mockMachines = func(mock *r.Mock, liveliness metal.MachineLiveliness, reservations []metal.Reservation, ms ...metal.Machine) { var ( sizes metal.Sizes events metal.ProvisioningEventContainers @@ -258,7 +258,7 @@ func TestPartitionCapacity(t *testing.T) { ) for _, m := range ms { - ec := metal.ProvisioningEventContainer{Base: metal.Base{ID: m.ID}, Liveliness: metal.MachineLivelinessAlive} + ec := metal.ProvisioningEventContainer{Base: metal.Base{ID: m.ID}, Liveliness: liveliness} if m.Waiting { ec.Events = append(ec.Events, metal.ProvisioningEvent{ Event: metal.ProvisioningEventWaiting, @@ -327,7 +327,7 @@ func TestPartitionCapacity(t *testing.T) { name: "one allocated machine", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") - mockMachines(mock, nil, m1) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -350,7 +350,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") m2 := machineTpl("2", "partition-a", "size-a", "project-123") - mockMachines(mock, nil, m1, m2) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1, m2) }, want: []*v1.PartitionCapacity{ { @@ -373,7 +373,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "project-123") m1.IPMI.Address = "" - mockMachines(mock, nil, m1) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -398,7 +398,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true - mockMachines(mock, nil, m1) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -416,13 +416,39 @@ func TestPartitionCapacity(t *testing.T) { }, }, }, + { + name: "one dead machine", + mockFn: func(mock *r.Mock) { + m1 := machineTpl("1", "partition-a", "size-a", "") + m1.Waiting = true + + mockMachines(mock, metal.MachineLivelinessDead, nil, m1) + }, + want: []*v1.PartitionCapacity{ + { + Common: v1.Common{ + Identifiable: v1.Identifiable{ID: "partition-a"}, Describable: v1.Describable{Name: pointer.Pointer(""), Description: pointer.Pointer("")}, + }, + ServerCapacities: v1.ServerCapacities{ + { + Size: "size-a", + Total: 1, + Waiting: 1, + Faulty: 1, + Unavailable: 1, + FaultyMachines: []string{"1"}, + }, + }, + }, + }, + }, { name: "one waiting, one allocated machine", mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true m2 := machineTpl("2", "partition-a", "size-a", "project-123") - mockMachines(mock, nil, m1, m2) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1, m2) }, want: []*v1.PartitionCapacity{ { @@ -448,7 +474,7 @@ func TestPartitionCapacity(t *testing.T) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = true m1.State.Value = metal.AvailableState - mockMachines(mock, nil, m1) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -471,7 +497,7 @@ func TestPartitionCapacity(t *testing.T) { mockFn: func(mock *r.Mock) { m1 := machineTpl("1", "partition-a", "size-a", "") m1.Waiting = false - mockMachines(mock, nil, m1) + mockMachines(mock, metal.MachineLivelinessAlive, nil, m1) }, want: []*v1.PartitionCapacity{ { @@ -504,7 +530,7 @@ func TestPartitionCapacity(t *testing.T) { }, } - mockMachines(mock, reservations, m1) + mockMachines(mock, metal.MachineLivelinessAlive, reservations, m1) }, want: []*v1.PartitionCapacity{ { @@ -543,7 +569,7 @@ func TestPartitionCapacity(t *testing.T) { }, } - mockMachines(mock, reservations, m1) + mockMachines(mock, metal.MachineLivelinessAlive, reservations, m1) }, want: []*v1.PartitionCapacity{ { @@ -579,7 +605,7 @@ func TestPartitionCapacity(t *testing.T) { }, } - mockMachines(mock, reservations, m1, m2, m3) + mockMachines(mock, metal.MachineLivelinessAlive, reservations, m1, m2, m3) }, want: []*v1.PartitionCapacity{ { @@ -617,7 +643,7 @@ func TestPartitionCapacity(t *testing.T) { }, } - mockMachines(mock, reservations, m1, m2, m3) + mockMachines(mock, metal.MachineLivelinessAlive, reservations, m1, m2, m3) }, want: []*v1.PartitionCapacity{ { @@ -660,7 +686,7 @@ func TestPartitionCapacity(t *testing.T) { }, } - mockMachines(mock, reservations, m1, m2, m3) + mockMachines(mock, metal.MachineLivelinessAlive, reservations, m1, m2, m3) }, want: []*v1.PartitionCapacity{ { From 11c9d461a2adb0a2b2527588152a1c99eb3e97ef Mon Sep 17 00:00:00 2001 From: Gerrit Date: Mon, 16 Sep 2024 12:22:04 +0200 Subject: [PATCH 4/4] Add `Allocatable` and `RemainingReservations` to partition capacity. (#573) --- .../internal/service/partition-service.go | 9 ++- .../service/partition-service_test.go | 62 +++++++++++-------- .../internal/service/v1/partition.go | 5 ++ spec/metal-api.json | 10 +++ 4 files changed, 57 insertions(+), 29 deletions(-) diff --git a/cmd/metal-api/internal/service/partition-service.go b/cmd/metal-api/internal/service/partition-service.go index f783ac4a..b0573768 100644 --- a/cmd/metal-api/internal/service/partition-service.go +++ b/cmd/metal-api/internal/service/partition-service.go @@ -446,7 +446,8 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque case m.Allocation != nil: cap.Allocated++ case m.Waiting && !m.PreAllocated && m.State.Value == metal.AvailableState && ec.Liveliness == metal.MachineLivelinessAlive: - // the free machine count considers the same aspects as the query for electing the machine candidate! + // the free and allocatable machine counts consider the same aspects as the query for electing the machine candidate! + cap.Allocatable++ cap.Free++ default: cap.Unavailable++ @@ -469,8 +470,6 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque pc := pc for _, cap := range pc.ServerCapacities { - cap := cap - size := sizesByID[cap.Size] for _, reservation := range size.Reservations.ForPartition(pc.ID) { @@ -483,6 +482,10 @@ func (r *partitionResource) calcPartitionCapacity(pcr *v1.PartitionCapacityReque } } + for _, cap := range pc.ServerCapacities { + cap.RemainingReservations = cap.Reservations - cap.UsedReservations + } + res = append(res, *pc) } diff --git a/cmd/metal-api/internal/service/partition-service_test.go b/cmd/metal-api/internal/service/partition-service_test.go index fd6b15a8..943492d9 100644 --- a/cmd/metal-api/internal/service/partition-service_test.go +++ b/cmd/metal-api/internal/service/partition-service_test.go @@ -407,10 +407,11 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 1, - Waiting: 1, - Free: 1, + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + Allocatable: 1, }, }, }, @@ -457,12 +458,13 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 2, - Allocated: 1, - Waiting: 1, - PhonedHome: 1, - Free: 1, + Size: "size-a", + Total: 2, + Allocated: 1, + Waiting: 1, + PhonedHome: 1, + Free: 1, + Allocatable: 1, }, }, }, @@ -483,10 +485,11 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 1, - Waiting: 1, - Free: 1, + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 1, + Allocatable: 1, }, }, }, @@ -539,12 +542,14 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 1, - Waiting: 1, - Free: 0, - Reservations: 1, - UsedReservations: 0, + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 0, + Allocatable: 1, + Reservations: 1, + UsedReservations: 0, + RemainingReservations: 1, }, }, }, @@ -578,12 +583,14 @@ func TestPartitionCapacity(t *testing.T) { }, ServerCapacities: v1.ServerCapacities{ { - Size: "size-a", - Total: 1, - Waiting: 1, - Free: 0, - Reservations: 3, - UsedReservations: 0, + Size: "size-a", + Total: 1, + Waiting: 1, + Free: 0, + Allocatable: 1, + Reservations: 3, + UsedReservations: 0, + RemainingReservations: 3, }, }, }, @@ -619,6 +626,7 @@ func TestPartitionCapacity(t *testing.T) { Allocated: 2, Waiting: 1, Free: 1, + Allocatable: 1, Reservations: 2, UsedReservations: 2, PhonedHome: 2, @@ -657,6 +665,7 @@ func TestPartitionCapacity(t *testing.T) { Allocated: 2, Waiting: 1, Free: 1, + Allocatable: 1, Reservations: 1, UsedReservations: 1, PhonedHome: 2, @@ -700,6 +709,7 @@ func TestPartitionCapacity(t *testing.T) { Allocated: 2, Waiting: 1, Free: 1, + Allocatable: 1, Reservations: 2, UsedReservations: 2, PhonedHome: 2, diff --git a/cmd/metal-api/internal/service/v1/partition.go b/cmd/metal-api/internal/service/v1/partition.go index fc9b8c3a..f3304241 100644 --- a/cmd/metal-api/internal/service/v1/partition.go +++ b/cmd/metal-api/internal/service/v1/partition.go @@ -69,6 +69,9 @@ type ServerCapacity struct { // Allocated is the amount of machines that are currently allocated. Allocated int `json:"allocated,omitempty" description:"allocated machines"` + // Allocatable is the amount of machines in a partition is the amount of machines that can be allocated. + // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable. Size reservations are not considered in this count. + Allocatable int `json:"allocatable,omitempty" description:"free machines with this size, size reservations are not considered"` // Free is the amount of machines in a partition that can be freely allocated at any given moment by a project. // Effectively this is the amount of waiting machines minus the machines that are unavailable due to machine state or un-allocatable due to size reservations. Free int `json:"free,omitempty" description:"free machines with this size (freely allocatable)"` @@ -85,6 +88,8 @@ type ServerCapacity struct { Reservations int `json:"reservations,omitempty" description:"the amount of reservations for this size"` // UsedReservations is the amount of reservations already used up for this size. UsedReservations int `json:"usedreservations,omitempty" description:"the amount of used reservations for this size"` + // RemainingReservations is the amount of reservations remaining for this size. + RemainingReservations int `json:"remainingreservations,omitempty" description:"the amount of unused / remaining / open reservations for this size"` } func NewPartitionResponse(p *metal.Partition) *PartitionResponse { diff --git a/spec/metal-api.json b/spec/metal-api.json index b67cf276..a8625172 100644 --- a/spec/metal-api.json +++ b/spec/metal-api.json @@ -4399,6 +4399,11 @@ }, "v1.ServerCapacity": { "properties": { + "allocatable": { + "description": "free machines with this size, size reservations are not considered", + "format": "int32", + "type": "integer" + }, "allocated": { "description": "allocated machines", "format": "int32", @@ -4438,6 +4443,11 @@ "format": "int32", "type": "integer" }, + "remainingreservations": { + "description": "the amount of unused / remaining / open reservations for this size", + "format": "int32", + "type": "integer" + }, "reservations": { "description": "the amount of reservations for this size", "format": "int32",