-
Notifications
You must be signed in to change notification settings - Fork 239
Expand file tree
/
Copy pathDockerfile.cuda
More file actions
121 lines (99 loc) · 4.07 KB
/
Dockerfile.cuda
File metadata and controls
121 lines (99 loc) · 4.07 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
#inspired from https://github.com/astral-sh/uv-docker-example/blob/main/multistage.Dockerfile
# Build stage - multi-arch base (supports amd64 + arm64)
FROM nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04 AS builder
LABEL maintainer="prime intellect"
LABEL repository="prime-rl"
# Set en_US.UTF-8 locale by default
RUN echo "LC_ALL=en_US.UTF-8" >> /etc/environment
# Set CUDA_HOME and update PATH
ENV CUDA_HOME=/usr/local/cuda
ENV PATH=$PATH:/usr/local/cuda/bin
# Install packages (including Python 3.12 and ninja for fast CUDA kernel compilation)
ARG DEBIAN_FRONTEND=noninteractive
ENV DEBIAN_FRONTEND=noninteractive
ENV TZ=Etc/UTC
RUN apt-get update && apt-get install -y --no-install-recommends --force-yes \
build-essential \
curl \
sudo \
git \
ninja-build \
software-properties-common \
&& add-apt-repository ppa:deadsnakes/ppa \
&& apt-get update \
&& apt-get install -y --no-install-recommends python3.12 python3.12-dev python3.12-venv \
&& ln -sf /usr/bin/python3.12 /usr/bin/python3 \
&& ln -sf /usr/bin/python3.12 /usr/bin/python \
&& apt-get clean autoclean && rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
# Download the latest installer
ADD https://astral.sh/uv/install.sh /uv-installer.sh
# Set install dir to location accessible to non-root users
RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh
ENV PATH="/usr/local/bin:$PATH"
ENV UV_PYTHON_INSTALL_DIR="/usr/local/share/uv/python"
ENV UV_CACHE_DIR="/usr/local/share/uv/cache"
# Install Python dependencies (The gradual copies help with caching)
WORKDIR /app
ENV UV_COMPILE_BYTECODE=1 UV_LINK_MODE=copy
COPY pyproject.toml /app/pyproject.toml
COPY uv.lock /app/uv.lock
COPY README.md /app/README.md
COPY src/ /app/src/
COPY configs /app/configs
COPY examples /app/examples
COPY benchmarks/scripts /app/benchmarks/scripts
RUN --mount=type=cache,target=/app/.cache/uv \
uv sync --extra flash-attn --extra flash-attn-3 --extra flash-attn-cute --extra envs --group mamba-ssm --locked --no-dev
# arm64: build flash-attn from source, fix namespace conflicts, apply workarounds
ARG TARGETARCH
COPY scripts/docker-arm64-post-install.sh /app/scripts/docker-arm64-post-install.sh
RUN if [ "$TARGETARCH" = "arm64" ]; then /app/scripts/docker-arm64-post-install.sh; fi
FROM python:3.12-slim
RUN apt-get update && apt-get install -y \
--no-install-recommends \
--force-yes \
build-essential \
wget \
clang \
tmux \
iperf \
openssh-server \
git-lfs \
gpg \
sudo \
iputils-ping \
net-tools \
curl \
vim \
libibverbs1 \
ibverbs-providers \
&& apt-get clean autoclean \
&& rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/*
ARG USER_ID=1000
ARG GROUP_ID=1000
RUN groupadd --gid $GROUP_ID appuser && \
useradd --uid $USER_ID --gid appuser --create-home --shell /bin/bash appuser && \
usermod -aG sudo appuser && \
echo "appuser ALL=(ALL) NOPASSWD:ALL" >> /etc/sudoers
# Install uv for development use
ADD https://astral.sh/uv/install.sh /uv-installer.sh
RUN INSTALLER_NO_MODIFY_PATH=1 UV_INSTALL_DIR="/usr/local/bin" sh /uv-installer.sh && rm /uv-installer.sh
USER appuser
ENV PATH="/usr/local/bin:$PATH"
WORKDIR /app
# Copy the application from the builder
COPY --from=builder --chown=appuser:appuser /app /app
# Copy and set up entrypoint script
COPY --chown=appuser:appuser scripts/docker-entrypoint.sh /app/docker-entrypoint.sh
RUN chmod +x /app/docker-entrypoint.sh
RUN rm /app/.venv/bin/python && ln -s /usr/local/bin/python /app/.venv/bin/python
RUN rm /app/.venv/bin/python3 && ln -s /usr/local/bin/python /app/.venv/bin/python3
RUN rm /app/.venv/bin/python3.12 && ln -s /usr/local/bin/python /app/.venv/bin/python3.12
# Place executables in the environment at the front of the path
ENV PATH="/app/.venv/bin:$PATH"
# HuggingFace Hub timeouts (defaults are 10s which causes issues on slow networks)
ENV HF_HUB_ETAG_TIMEOUT=500
ENV HF_HUB_DOWNLOAD_TIMEOUT=300
# Use entrypoint for setup (ulimit, etc) but default to sleep infinity for K8s
ENTRYPOINT ["/app/docker-entrypoint.sh"]
CMD ["sleep", "infinity"]