Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
46 changes: 46 additions & 0 deletions Sources/Container-Compose/ActivityClock.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
//===----------------------------------------------------------------------===//
// Copyright © 2025 Morris Richman and the Container-Compose project authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

import Foundation

/// Thread-safe timestamp of the most recent output from a service's
/// `container run` subprocess. Written from the streaming `Task` and read by
/// the readiness wait to tell "slow but progressing" apart from "stuck".
///
/// The clock is injectable so the idle-window logic can be exercised in tests
/// without leaning on real wall-clock time.
final class ActivityClock: @unchecked Sendable {
private let lock = NSLock()
private let now: @Sendable () -> Date
private var _lastActivity: Date

init(now: @escaping @Sendable () -> Date = { Date() }) {
self.now = now
self._lastActivity = now()
}

func touch() {
lock.lock()
_lastActivity = now()
lock.unlock()
}

var lastActivity: Date {
lock.lock()
defer { lock.unlock() }
return _lastActivity
}
}
75 changes: 59 additions & 16 deletions Sources/Container-Compose/Commands/ComposeUp.swift
Original file line number Diff line number Diff line change
Expand Up @@ -261,32 +261,64 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable {
return ip
}

/// Repeatedly checks `container list -a` until the given container is listed as `running`.
/// Repeatedly polls until the named container reports `running`.
///
/// The container is launched by a `container run` subprocess that may first
/// download images — notably the one-time ~64 MB init image — and that pull
/// happens *inside* this wait window. A fixed wall-clock timeout therefore
/// aborted mid-download on slow connections (`up -d` failing with
/// "Timed out waiting for container ... to be running").
///
/// Instead we use an *idle* timeout: the run subprocess streams pull/startup
/// progress into `activity`, so we only give up after `idleTimeout` seconds
/// with no output *and* the container still not running — i.e. genuinely
/// stuck, not merely slow. Mirrors `docker compose up`, which shows pull
/// progress and doesn't bail during an active download.
///
/// On top of the idle timeout we keep an absolute `maxWait` backstop: a
/// container that keeps dribbling output every few seconds without ever
/// reaching `running` would otherwise refresh `activity` forever and hang
/// `up -d` indefinitely. The backstop bounds that pathological case while
/// still leaving plenty of room for a genuinely slow (but progressing) pull.
/// - Parameters:
/// - containerName: The exact name of the container (e.g. "Assignment-Manager-API-db").
/// - timeout: Max seconds to wait before failing.
/// - serviceName: Compose service name; the container is `<project>-<service>`.
/// - activity: Tracks the last time the run subprocess produced output.
/// - idleTimeout: Max seconds of no output (while not running) before failing.
/// - maxWait: Absolute ceiling on the wait, regardless of ongoing output.
/// - interval: How often to poll (in seconds).
/// - Returns: `true` if the container reached "running" state within the timeout.
private func waitUntilServiceIsRunning(_ serviceName: String, timeout: TimeInterval = 30, interval: TimeInterval = 0.5) async throws {
private func waitUntilServiceIsRunning(_ serviceName: String, activity: ActivityClock, idleTimeout: TimeInterval = 30, maxWait: TimeInterval = 300, interval: TimeInterval = 0.5) async throws {
guard let projectName else { return }
let containerName = "\(projectName)-\(serviceName)"

let deadline = Date().addingTimeInterval(timeout)
let client = ContainerClient()
let start = Date()

while Date() < deadline {
while true {
try await Task.sleep(nanoseconds: UInt64(interval * 1_000_000_000))
let container = try? await client.get(id: containerName)
if container?.status == .running {
return
}
let now = Date()
// An active pull keeps refreshing `activity`, pushing the idle
// deadline out, so slow downloads never trip this — only genuine
// silence does.
if now.timeIntervalSince(activity.lastActivity) > idleTimeout {
throw NSError(
domain: "ContainerWait", code: 1,
userInfo: [
NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running."
])
}
// Absolute backstop: even with continuous output, never wait past
// `maxWait` for the container to come up.
if now.timeIntervalSince(start) > maxWait {
throw NSError(
domain: "ContainerWait", code: 1,
userInfo: [
NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running (exceeded \(Int(maxWait))s)."
])
}
}

throw NSError(
domain: "ContainerWait", code: 1,
userInfo: [
NSLocalizedDescriptionKey: "Timed out waiting for container '\(containerName)' to be running."
])
}

private func stopOldStuff(_ services: [String], remove: Bool) async throws {
Expand Down Expand Up @@ -625,9 +657,14 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable {

self.containerConsoleColors[serviceName] = serviceColor

Task { [self, serviceColor] in
// Tracks output from the run subprocess so the readiness wait below can
// tell an in-progress image pull from a stuck container.
let activity = ActivityClock()

let runTask = Task { [self, serviceColor, activity] in
@Sendable
func handleOutput(_ output: String) {
activity.touch()
print("\(serviceName): \(output)".applyingColor(serviceColor))
}

Expand All @@ -638,9 +675,15 @@ public struct ComposeUp: AsyncParsableCommand, @unchecked Sendable {
}

do {
try await waitUntilServiceIsRunning(serviceName)
try await waitUntilServiceIsRunning(serviceName, activity: activity)
try await updateEnvironmentWithServiceIP(serviceName)
} catch {
// The wait gave up (idle/backstop timeout) but the `container run`
// subprocess is still streaming in the background. Tear it down so
// it doesn't leak past the failed wait: cancel the streaming task
// and stop the container, which also lets the subprocess exit.
runTask.cancel()
try? await stopOldStuff([serviceName], remove: false)
print(error)
}
}
Expand Down
109 changes: 109 additions & 0 deletions Tests/Container-Compose-StaticTests/ActivityClockTests.swift
Original file line number Diff line number Diff line change
@@ -0,0 +1,109 @@
//===----------------------------------------------------------------------===//
// Copyright © 2025 Morris Richman and the Container-Compose project authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//===----------------------------------------------------------------------===//

import Testing
import Foundation
@testable import ContainerComposeCore

@Suite("ActivityClock")
struct ActivityClockTests {

/// A clock backed by a mutable, thread-safe "current time" so the idle
/// window can be exercised deterministically without `Task.sleep`.
private final class FakeClock: @unchecked Sendable {
private let lock = NSLock()
private var _now: Date
init(_ start: Date) { _now = start }
var now: Date {
lock.lock(); defer { lock.unlock() }
return _now
}
func advance(by interval: TimeInterval) {
lock.lock(); _now += interval; lock.unlock()
}
}

@Test("seeds lastActivity from the injected clock")
func seedsFromInjectedClock() {
let start = Date(timeIntervalSince1970: 1_000)
let fake = FakeClock(start)
let clock = ActivityClock(now: { fake.now })

#expect(clock.lastActivity == start)
}

@Test("touch() captures the injected clock's current time")
func touchCapturesInjectedTime() {
let start = Date(timeIntervalSince1970: 1_000)
let fake = FakeClock(start)
let clock = ActivityClock(now: { fake.now })

fake.advance(by: 5)
clock.touch()

#expect(clock.lastActivity == start.addingTimeInterval(5))
}

/// The wait logic compares `now - lastActivity` against the idle timeout.
/// With an injected clock we can verify that window without real time: a
/// `touch()` resets the elapsed-since-activity interval to zero, and time
/// advancing without a touch grows it past the threshold.
@Test("idle window reflects time since the last touch")
func idleWindowReflectsTimeSinceTouch() {
let start = Date(timeIntervalSince1970: 1_000)
let fake = FakeClock(start)
let clock = ActivityClock(now: { fake.now })

// Active progress: a touch right before we measure keeps the window small.
fake.advance(by: 100)
clock.touch()
#expect(fake.now.timeIntervalSince(clock.lastActivity) == 0)

// Silence: time moves on without a touch, so the window grows.
fake.advance(by: 31)
#expect(fake.now.timeIntervalSince(clock.lastActivity) == 31)
}

/// `touch()` and `lastActivity` race across threads in production (the
/// streaming task writes while the wait loop reads). Hammer both
/// concurrently and assert we observe a coherent, monotonic value and no
/// crash from a data race.
@Test("touch() and lastActivity are safe under concurrent access")
func concurrentAccessIsSafe() async {
let clock = ActivityClock()
let iterations = 10_000

await withTaskGroup(of: Void.self) { group in
// Writers.
for _ in 0..<4 {
group.addTask {
for _ in 0..<iterations { clock.touch() }
}
}
// Readers — each read must return a real Date, never tear.
for _ in 0..<4 {
group.addTask {
for _ in 0..<iterations {
_ = clock.lastActivity.timeIntervalSince1970
}
}
}
}

// After all writers finished, lastActivity is at/after the seed time.
#expect(clock.lastActivity.timeIntervalSince1970 > 0)
}
}
Loading