From 9957a1db91177b3713d3203f4856e39b4c8d787a Mon Sep 17 00:00:00 2001 From: Ashraf Fouda Date: Tue, 7 Oct 2025 17:09:02 +0300 Subject: [PATCH] skipping unreachable containers when deploying zdbs Signed-off-by: Ashraf Fouda --- pkg/primitives/zdb/zdb.go | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/pkg/primitives/zdb/zdb.go b/pkg/primitives/zdb/zdb.go index 19ff4cf5..0f312ee8 100644 --- a/pkg/primitives/zdb/zdb.go +++ b/pkg/primitives/zdb/zdb.go @@ -168,7 +168,7 @@ func (p *Manager) zdbProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWi if err != nil { return zos.ZDBResult{}, errors.Wrap(err, "failed to find IP address on zdb0 interface") } - + log.Info().Msg("found a container matching this namespace of the requested zdb") return zos.ZDBResult{ Namespace: nsID, IPs: ipsToString(containerIPs), @@ -189,8 +189,24 @@ func (p *Manager) zdbProvisionImpl(ctx context.Context, wl *gridtypes.WorkloadWi var cont tZDBContainer if len(candidates) > 0 { - cont = candidates[0] - } else { + // try to find the first reachable zdb instance + for _, c := range candidates { + cl := zdbConnection(pkg.ContainerID(c.Name)) + if err := cl.Connect(); err == nil { + _ = cl.Close() + log.Debug().Str("container name", c.Name).Msg( + "found running container suitable for provisioning the namespace of the requested zdb", + ) + cont = c + break + } + log.Debug().Str("container name", c.Name).Msg("zdb container is not reachable") + _ = cl.Close() + } + } + + // if no reachable candidate was found, allocate a new device and start a container + if cont.Name == "" { // allocate new disk device, err := storage.DeviceAllocate(ctx, config.Size) if err != nil { @@ -869,6 +885,7 @@ func socketFile(containerID pkg.ContainerID) string { // mock it in testing. var zdbConnection = func(id pkg.ContainerID) zdb.Client { socket := fmt.Sprintf("unix://%s@%s", string(id), socketFile(id)) + log.Debug().Str("Socket", socket).Msg("connecting to zdb container on socket") return zdb.New(socket) }