diff --git a/Sources/Container-Compose/ActivityClock.swift b/Sources/Container-Compose/ActivityClock.swift new file mode 100644 index 00000000..20bc3088 --- /dev/null +++ b/Sources/Container-Compose/ActivityClock.swift @@ -0,0 +1,46 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025 Morris Richman and the Container-Compose project authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import Foundation + +/// Thread-safe timestamp of the most recent output from a service's +/// `container run` subprocess. Written from the streaming `Task` and read by +/// the readiness wait to tell "slow but progressing" apart from "stuck". +/// +/// The clock is injectable so the idle-window logic can be exercised in tests +/// without leaning on real wall-clock time. +final class ActivityClock: @unchecked Sendable { + private let lock = NSLock() + private let now: @Sendable () -> Date + private var _lastActivity: Date + + init(now: @escaping @Sendable () -> Date = { Date() }) { + self.now = now + self._lastActivity = now() + } + + func touch() { + lock.lock() + _lastActivity = now() + lock.unlock() + } + + var lastActivity: Date { + lock.lock() + defer { lock.unlock() } + return _lastActivity + } +} diff --git a/Sources/Container-Compose/Commands/ComposeUp.swift b/Sources/Container-Compose/Commands/ComposeUp.swift index 97a16f96..cb10885b 100644 --- a/Sources/Container-Compose/Commands/ComposeUp.swift +++ b/Sources/Container-Compose/Commands/ComposeUp.swift @@ -261,32 +261,64 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable { return ip } - /// Repeatedly checks `container list -a` until the given container is listed as `running`. + /// Repeatedly polls until the named container reports `running`. + /// + /// The container is launched by a `container run` subprocess that may first + /// download images — notably the one-time ~64 MB init image — and that pull + /// happens *inside* this wait window. A fixed wall-clock timeout therefore + /// aborted mid-download on slow connections (`up -d` failing with + /// "Timed out waiting for container ... to be running"). + /// + /// Instead we use an *idle* timeout: the run subprocess streams pull/startup + /// progress into `activity`, so we only give up after `idleTimeout` seconds + /// with no output *and* the container still not running — i.e. genuinely + /// stuck, not merely slow. Mirrors `docker compose up`, which shows pull + /// progress and doesn't bail during an active download. + /// + /// On top of the idle timeout we keep an absolute `maxWait` backstop: a + /// container that keeps dribbling output every few seconds without ever + /// reaching `running` would otherwise refresh `activity` forever and hang + /// `up -d` indefinitely. The backstop bounds that pathological case while + /// still leaving plenty of room for a genuinely slow (but progressing) pull. /// - Parameters: - /// - containerName: The exact name of the container (e.g. "Assignment-Manager-API-db"). - /// - timeout: Max seconds to wait before failing. + /// - serviceName: Compose service name; the container is `-`. + /// - activity: Tracks the last time the run subprocess produced output. + /// - idleTimeout: Max seconds of no output (while not running) before failing. + /// - maxWait: Absolute ceiling on the wait, regardless of ongoing output. /// - interval: How often to poll (in seconds). - /// - Returns: `true` if the container reached "running" state within the timeout. - private func waitUntilServiceIsRunning(_ serviceName: String, timeout: TimeInterval = 30, interval: TimeInterval = 0.5) async throws { + private func waitUntilServiceIsRunning(_ serviceName: String, activity: ActivityClock, idleTimeout: TimeInterval = 30, maxWait: TimeInterval = 300, interval: TimeInterval = 0.5) async throws { guard let projectName else { return } let containerName = "\(projectName)-\(serviceName)" - - let deadline = Date().addingTimeInterval(timeout) let client = ContainerClient() + let start = Date() - while Date() < deadline { + while true { try await Task.sleep(nanoseconds: UInt64(interval * 1_000_000_000)) let container = try? await client.get(id: containerName) if container?.status == .running { return } + let now = Date() + // An active pull keeps refreshing `activity`, pushing the idle + // deadline out, so slow downloads never trip this — only genuine + // silence does. + if now.timeIntervalSince(activity.lastActivity) > idleTimeout { + throw NSError( + domain: "ContainerWait", code: 1, + userInfo: [ + NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running." + ]) + } + // Absolute backstop: even with continuous output, never wait past + // `maxWait` for the container to come up. + if now.timeIntervalSince(start) > maxWait { + throw NSError( + domain: "ContainerWait", code: 1, + userInfo: [ + NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running (exceeded \(Int(maxWait))s)." + ]) + } } - - throw NSError( - domain: "ContainerWait", code: 1, - userInfo: [ - NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running." - ]) } private func stopOldStuff(_ services: [String], remove: Bool) async throws { @@ -625,9 +657,14 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable { self.containerConsoleColors[serviceName] = serviceColor - Task { [self, serviceColor] in + // Tracks output from the run subprocess so the readiness wait below can + // tell an in-progress image pull from a stuck container. + let activity = ActivityClock() + + let runTask = Task { [self, serviceColor, activity] in @Sendable func handleOutput(_ output: String) { + activity.touch() print("\(serviceName): \(output)".applyingColor(serviceColor)) } @@ -638,9 +675,15 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable { } do { - try await waitUntilServiceIsRunning(serviceName) + try await waitUntilServiceIsRunning(serviceName, activity: activity) try await updateEnvironmentWithServiceIP(serviceName) } catch { + // The wait gave up (idle/backstop timeout) but the `container run` + // subprocess is still streaming in the background. Tear it down so + // it doesn't leak past the failed wait: cancel the streaming task + // and stop the container, which also lets the subprocess exit. + runTask.cancel() + try? await stopOldStuff([serviceName], remove: false) print(error) } } diff --git a/Tests/Container-Compose-StaticTests/ActivityClockTests.swift b/Tests/Container-Compose-StaticTests/ActivityClockTests.swift new file mode 100644 index 00000000..d8143e16 --- /dev/null +++ b/Tests/Container-Compose-StaticTests/ActivityClockTests.swift @@ -0,0 +1,109 @@ +//===----------------------------------------------------------------------===// +// Copyright © 2025 Morris Richman and the Container-Compose project authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +//===----------------------------------------------------------------------===// + +import Testing +import Foundation +@testable import ContainerComposeCore + +@Suite("ActivityClock") +struct ActivityClockTests { + + /// A clock backed by a mutable, thread-safe "current time" so the idle + /// window can be exercised deterministically without `Task.sleep`. + private final class FakeClock: @unchecked Sendable { + private let lock = NSLock() + private var _now: Date + init(_ start: Date) { _now = start } + var now: Date { + lock.lock(); defer { lock.unlock() } + return _now + } + func advance(by interval: TimeInterval) { + lock.lock(); _now += interval; lock.unlock() + } + } + + @Test("seeds lastActivity from the injected clock") + func seedsFromInjectedClock() { + let start = Date(timeIntervalSince1970: 1_000) + let fake = FakeClock(start) + let clock = ActivityClock(now: { fake.now }) + + #expect(clock.lastActivity == start) + } + + @Test("touch() captures the injected clock's current time") + func touchCapturesInjectedTime() { + let start = Date(timeIntervalSince1970: 1_000) + let fake = FakeClock(start) + let clock = ActivityClock(now: { fake.now }) + + fake.advance(by: 5) + clock.touch() + + #expect(clock.lastActivity == start.addingTimeInterval(5)) + } + + /// The wait logic compares `now - lastActivity` against the idle timeout. + /// With an injected clock we can verify that window without real time: a + /// `touch()` resets the elapsed-since-activity interval to zero, and time + /// advancing without a touch grows it past the threshold. + @Test("idle window reflects time since the last touch") + func idleWindowReflectsTimeSinceTouch() { + let start = Date(timeIntervalSince1970: 1_000) + let fake = FakeClock(start) + let clock = ActivityClock(now: { fake.now }) + + // Active progress: a touch right before we measure keeps the window small. + fake.advance(by: 100) + clock.touch() + #expect(fake.now.timeIntervalSince(clock.lastActivity) == 0) + + // Silence: time moves on without a touch, so the window grows. + fake.advance(by: 31) + #expect(fake.now.timeIntervalSince(clock.lastActivity) == 31) + } + + /// `touch()` and `lastActivity` race across threads in production (the + /// streaming task writes while the wait loop reads). Hammer both + /// concurrently and assert we observe a coherent, monotonic value and no + /// crash from a data race. + @Test("touch() and lastActivity are safe under concurrent access") + func concurrentAccessIsSafe() async { + let clock = ActivityClock() + let iterations = 10_000 + + await withTaskGroup(of: Void.self) { group in + // Writers. + for _ in 0..<4 { + group.addTask { + for _ in 0.. 0) + } +}