Skip to content

Commit 63a74f4

Browse files
containers: prepare environmentd and clusterd for distroless migration
Move bash entrypoint logic into Rust binaries so environmentd and clusterd can run in distroless containers without a shell: clusterd: - Auto-detect Kubernetes FQDN from /etc/hostname (replaces `hostname --fqdn`) - Auto-detect StatefulSet ordinal from HOSTNAME env var - Configure LD_PRELOAD for eatmydata (CI only, no-op in distroless) environmentd: - Configure LD_PRELOAD for eatmydata - Sleep forever after graceful exit (keeps container alive for debugging) Also add Dockerfile.distroless variants for both services that use the distroless-prod-base image and expect a static `ssh` binary to be copied in for SSH tunnel support. Part of SEC-236. Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
1 parent 268bcd1 commit 63a74f4

File tree

5 files changed

+144
-0
lines changed

5 files changed

+144
-0
lines changed

src/clusterd/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ clap = { version = "4.5.23", features = ["derive", "env"] }
1616
fail = { version = "0.5.1", features = ["failpoints"] }
1717
futures = "0.3.32"
1818
hyper = "1.4.1"
19+
libc = "0.2"
1920
hyper-util = "0.1.20"
2021
mz-alloc = { path = "../alloc" }
2122
mz-alloc-default = { path = "../alloc-default", optional = true }
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright Materialize, Inc. and contributors. All rights reserved.
2+
#
3+
# Use of this software is governed by the Business Source License
4+
# included in the LICENSE file at the root of this repository.
5+
#
6+
# As of the Change Date specified in that file, in accordance with
7+
# the Business Source License, use of this software will be governed
8+
# by the Apache License, Version 2.0.
9+
10+
# Distroless variant of the clusterd image. Requires the entrypoint
11+
# logic to be compiled into the binary (eatmydata, Kubernetes detection,
12+
# env var defaults) rather than handled by a bash script. Also requires
13+
# a static `ssh` binary for SSH tunnel support.
14+
15+
MZFROM distroless-prod-base
16+
17+
COPY clusterd /usr/local/bin/
18+
COPY ssh /usr/bin/
19+
20+
ENTRYPOINT ["/usr/local/bin/clusterd"]

src/clusterd/src/lib.rs

Lines changed: 84 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,45 @@ mod usage_metrics;
4747

4848
const BUILD_INFO: BuildInfo = build_info!();
4949

50+
/// Resolves a short hostname to its FQDN using `getaddrinfo` with
51+
/// `AI_CANONNAME`, equivalent to `hostname --fqdn`. Falls back to the
52+
/// short hostname if DNS resolution fails.
53+
fn resolve_fqdn(short_hostname: &str) -> String {
54+
use std::ffi::{CStr, CString};
55+
use std::ptr;
56+
57+
let Ok(c_host) = CString::new(short_hostname) else {
58+
return short_hostname.to_string();
59+
};
60+
61+
let mut hints: libc::addrinfo = unsafe { std::mem::zeroed() };
62+
hints.ai_flags = libc::AI_CANONNAME;
63+
hints.ai_family = libc::AF_UNSPEC;
64+
65+
let mut result: *mut libc::addrinfo = ptr::null_mut();
66+
67+
let rc = unsafe { libc::getaddrinfo(c_host.as_ptr(), ptr::null(), &hints, &mut result) };
68+
69+
if rc != 0 || result.is_null() {
70+
return short_hostname.to_string();
71+
}
72+
73+
let fqdn = unsafe {
74+
let info = &*result;
75+
if info.ai_canonname.is_null() {
76+
short_hostname.to_string()
77+
} else {
78+
CStr::from_ptr(info.ai_canonname)
79+
.to_string_lossy()
80+
.into_owned()
81+
}
82+
};
83+
84+
unsafe { libc::freeaddrinfo(result) };
85+
86+
fqdn
87+
}
88+
5089
pub static VERSION: LazyLock<String> = LazyLock::new(|| BUILD_INFO.human_version(None));
5190

5291
/// Independent cluster server for Materialize.
@@ -171,6 +210,51 @@ struct Args {
171210
pub fn main() {
172211
mz_ore::panic::install_enhanced_handler();
173212

213+
// When running in Kubernetes, auto-detect the GRPC host from the pod's FQDN
214+
// and the process index from the StatefulSet ordinal. These are set as env
215+
// vars so that clap picks them up as defaults (they can still be overridden
216+
// via explicit env vars or CLI args).
217+
//
218+
// SAFETY: Called before any threads are spawned (main entry point, single
219+
// threaded), so modifying env vars is safe.
220+
if std::env::var("KUBERNETES_SERVICE_HOST").is_ok() {
221+
if std::env::var("CLUSTERD_GRPC_HOST").is_err() {
222+
// Resolve the pod's FQDN via DNS, equivalent to `hostname --fqdn`.
223+
// In Kubernetes, /etc/hostname only has the short name (e.g.,
224+
// "clusterd-0"), but GRPC validation needs the FQDN (e.g.,
225+
// "clusterd-0.clusterd.ns.svc.cluster.local"). We resolve the
226+
// short hostname through DNS to get the canonical name.
227+
//
228+
// This avoids shelling out to `hostname --fqdn` which isn't
229+
// available in distroless images.
230+
if let Ok(short) = std::fs::read_to_string("/etc/hostname") {
231+
let short = short.trim();
232+
if !short.is_empty() {
233+
let fqdn = resolve_fqdn(short);
234+
unsafe { std::env::set_var("CLUSTERD_GRPC_HOST", &fqdn) };
235+
}
236+
}
237+
}
238+
if std::env::var("CLUSTERD_PROCESS").is_err() {
239+
// Extract the ordinal index from the StatefulSet hostname
240+
// (e.g., "clusterd-0" → "0").
241+
if let Ok(hostname) = std::env::var("HOSTNAME") {
242+
if let Some(ordinal) = hostname.rsplit('-').next() {
243+
unsafe { std::env::set_var("CLUSTERD_PROCESS", ordinal) };
244+
}
245+
}
246+
}
247+
}
248+
249+
// Configure LD_PRELOAD for eatmydata if requested (CI performance
250+
// optimization). In distroless images libeatmydata.so is not available,
251+
// so this is a no-op.
252+
if std::env::var("MZ_EAT_MY_DATA").is_ok() {
253+
unsafe { std::env::set_var("LD_PRELOAD", "libeatmydata.so") };
254+
} else {
255+
unsafe { std::env::remove_var("LD_PRELOAD") };
256+
}
257+
174258
let args = cli::parse_args(CliConfig {
175259
env_prefix: Some("CLUSTERD_"),
176260
enable_version_flag: true,
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
# Copyright Materialize, Inc. and contributors. All rights reserved.
2+
#
3+
# Use of this software is governed by the Business Source License
4+
# included in the LICENSE file at the root of this repository.
5+
#
6+
# As of the Change Date specified in that file, in accordance with
7+
# the Business Source License, use of this software will be governed
8+
# by the Apache License, Version 2.0.
9+
10+
# Distroless variant of the environmentd image. Requires the entrypoint
11+
# logic to be compiled into the binary (eatmydata, sleep-on-exit) rather
12+
# than handled by a bash script. Also requires a static `ssh` binary for
13+
# SSH tunnel support and a static `tini` for PID 1 init.
14+
15+
MZFROM distroless-prod-base
16+
17+
COPY environmentd /usr/local/bin/
18+
COPY ssh /usr/bin/
19+
20+
ENTRYPOINT ["/usr/local/bin/environmentd"]

src/environmentd/src/environmentd/main.rs

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -640,13 +640,32 @@ fn aws_secrets_controller_key_alias(env_id: &EnvironmentId) -> String {
640640
}
641641

642642
pub fn main() {
643+
// Configure LD_PRELOAD for eatmydata if requested (CI performance
644+
// optimization). In distroless images libeatmydata.so is not available,
645+
// so this is a no-op.
646+
//
647+
// SAFETY: Called before any threads are spawned (main entry point, single
648+
// threaded), so modifying env vars is safe.
649+
if std::env::var("MZ_EAT_MY_DATA").is_ok() {
650+
unsafe { std::env::set_var("LD_PRELOAD", "libeatmydata.so") };
651+
} else {
652+
unsafe { std::env::remove_var("LD_PRELOAD") };
653+
}
654+
643655
let args = cli::parse_args(CliConfig {
644656
env_prefix: Some("MZ_"),
645657
enable_version_flag: true,
646658
});
647659
if let Err(err) = run(args) {
648660
panic!("environmentd: fatal: {}", err.display_with_causes());
649661
}
662+
// In the previous bash entrypoint, environmentd would sleep forever after
663+
// a graceful exit. This keeps the container alive for debugging. Replicate
664+
// that behavior here.
665+
eprintln!("environmentd exited gracefully; sleeping forever");
666+
loop {
667+
std::thread::sleep(std::time::Duration::from_secs(86400));
668+
}
650669
}
651670

652671
fn run(mut args: Args) -> Result<(), anyhow::Error> {

0 commit comments

Comments
 (0)