diff --git a/server/internal/ipam/service.go b/server/internal/ipam/service.go index 7cb2642d..83135011 100644 --- a/server/internal/ipam/service.go +++ b/server/internal/ipam/service.go @@ -37,6 +37,117 @@ func (s *Service) AllocateSubnet(ctx context.Context, prefix netip.Prefix, bits return s.allocateSubnet(ctx, prefix, bits, maxRetries) } +// ReleaseSubnet releases the subnet back to the pool. Best-effort: logs warnings and returns nil so callers are not failed. +func (s *Service) ReleaseSubnet(ctx context.Context, prefix netip.Prefix, bits int, subnet netip.Prefix) error { + s.mu.Lock() + defer s.mu.Unlock() + + if !subnet.IsValid() || !prefix.IsValid() { + s.logger.Warn(). + Str("subnet", subnet.String()). + Str("prefix", prefix.String()). + Msg("release subnet skipped: invalid subnet or prefix") + return nil + } + + var lastErr error + for retries := maxRetries; retries > 0; retries-- { + lastErr = s.releaseSubnet(ctx, prefix, bits, subnet) + if lastErr == nil { + return nil + } + if !errors.Is(lastErr, storage.ErrValueVersionMismatch) { + // Non-retryable: log and succeed so delete is not blocked + s.logger.Warn(). + Err(lastErr). + Str("subnet", subnet.String()). + Str("prefix", prefix.String()). + Msg("release subnet failed (non-fatal)") + return nil + } + } + s.logger.Warn(). + Err(lastErr). + Str("subnet", subnet.String()). + Str("prefix", prefix.String()). + Msg("release subnet failed after retries (non-fatal)") + return nil +} + +// releaseSubnet performs one release attempt. Returns ErrValueVersionMismatch on conflict for retry. +func (s *Service) releaseSubnet(ctx context.Context, prefix netip.Prefix, bits int, subnet netip.Prefix) error { + stored, err := s.store.GetByKey(prefix.String()).Exec(ctx) + if errors.Is(err, storage.ErrNotFound) { + s.logger.Warn(). + Str("prefix", prefix.String()). + Str("subnet", subnet.String()). + Msg("release subnet skipped: no allocator for range") + return nil + } + if err != nil { + return fmt.Errorf("failed to get subnet allocator from storage: %w", err) + } + + allocator, err := NewSubnetRange(SubnetRangeSpec{ + CIDR: prefix, + SubnetBits: bits, + }) + if err != nil { + return fmt.Errorf("failed to create subnet allocator: %w", err) + } + + if err := allocator.Restore(stored.Spec, stored.Snapshot); err != nil { + s.logger.Warn(). + Err(err). + Str("prefix", prefix.String()). + Str("subnet", subnet.String()). + Msg("release subnet skipped: failed to restore allocator (e.g. config changed)") + return nil + } + + if !allocator.Contains(subnet) { + s.logger.Warn(). + Str("subnet", subnet.String()). + Str("prefix", prefix.String()). + Msg("release subnet skipped: subnet outside or mismatched with configured range") + return nil + } + + if err := allocator.Release(subnet); err != nil { + s.logger.Warn(). + Err(err). + Str("subnet", subnet.String()). + Str("prefix", prefix.String()). + Msg("release subnet skipped: release failed (e.g. subnet not allocated)") + return nil + } + + spec, snapshot, err := allocator.Snapshot() + if err != nil { + s.logger.Warn(). + Err(err). + Str("subnet", subnet.String()). + Msg("release subnet skipped: failed to snapshot allocator") + return nil + } + + stored.Spec = spec + stored.Snapshot = snapshot + + if err := s.store.Update(stored).Exec(ctx); err != nil { + if errors.Is(err, storage.ErrValueVersionMismatch) { + return err + } + s.logger.Warn(). + Err(err). + Str("subnet", subnet.String()). + Msg("release subnet skipped: failed to store allocator") + return nil + } + + return nil +} + func (s *Service) allocateSubnet(ctx context.Context, prefix netip.Prefix, bits int, retriesRemaining int) (netip.Prefix, error) { if retriesRemaining < 1 { // This can happen if there's too much contention for this subnet range diff --git a/server/internal/ipam/subnet.go b/server/internal/ipam/subnet.go index bada9d6b..304f282e 100644 --- a/server/internal/ipam/subnet.go +++ b/server/internal/ipam/subnet.go @@ -225,6 +225,12 @@ func (r *SubnetRange) Has(subnet netip.Prefix) bool { return r.alloc.Has(offset) } +// Contains returns true if the subnet is within this range. +func (r *SubnetRange) Contains(subnet netip.Prefix) bool { + ok, _ := r.contains(subnet) + return ok +} + // Snapshot saves the current state of the pool. func (r *SubnetRange) Snapshot() (string, []byte, error) { snapshottable, ok := r.alloc.(allocator.Snapshottable) diff --git a/server/internal/orchestrator/swarm/network.go b/server/internal/orchestrator/swarm/network.go index d18ad19f..4f5685a1 100644 --- a/server/internal/orchestrator/swarm/network.go +++ b/server/internal/orchestrator/swarm/network.go @@ -161,14 +161,15 @@ func (n *Network) Delete(ctx context.Context, rc *resource.Context) error { return err } - // TODO: need to add a deallocate method to the ipam service - err = client.NetworkRemove(ctx, n.Name) - if errors.Is(err, docker.ErrNotFound) { - return nil - } else if err != nil { + if err != nil && !errors.Is(err, docker.ErrNotFound) { return fmt.Errorf("failed to remove network %q: %w", n.Name, err) } + if n.Subnet.IsValid() && n.Allocator.Prefix.IsValid() { + if ipamSvc, err := do.Invoke[*ipam.Service](rc.Injector); err == nil { + _ = ipamSvc.ReleaseSubnet(ctx, n.Allocator.Prefix, n.Allocator.Bits, n.Subnet) + } + } return nil }