Skip to content

Commit 032e160

Browse files
committed
use actions/cache to retrieve the cache
1 parent 12d074f commit 032e160

5 files changed

Lines changed: 134 additions & 10 deletions

File tree

.github/actions/build-container/action.yml

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,14 @@ inputs:
5454
description: "URL of the Bazel remote cache to use for building the image"
5555
required: true
5656
default: ""
57+
ENABLE_BAZEL_DISK_CACHE:
58+
description: "Enable Bazel disk cache via actions/cache"
59+
required: false
60+
default: "false"
61+
ENABLE_BAZEL_REPO_CACHE:
62+
description: "Enable Bazel repository cache via actions/cache"
63+
required: false
64+
default: "false"
5765

5866
outputs:
5967
DOCKER_TAG_MEALKIT:
@@ -106,6 +114,32 @@ runs:
106114
mv version.py .github/container/nsys_jax/nsys_jax/
107115
cat .github/container/nsys_jax/nsys_jax/version.py
108116
117+
# BAZEL CACHE RESTORE
118+
- name: Restore Bazel disk cache
119+
if: inputs.ENABLE_BAZEL_DISK_CACHE == 'true'
120+
uses: actions/cache/restore@v4
121+
with:
122+
path: /tmp/bazel-disk-cache
123+
key: bazel-disk-cache-${{ inputs.ARCHITECTURE }}-${{ github.run_id }}
124+
restore-keys: |
125+
bazel-disk-cache-${{ inputs.ARCHITECTURE }}-
126+
127+
- name: Restore Bazel repo cache
128+
if: inputs.ENABLE_BAZEL_REPO_CACHE == 'true'
129+
uses: actions/cache/restore@v4
130+
with:
131+
path: /tmp/bazel-repo-cache
132+
key: bazel-repo-cache-${{ inputs.ARCHITECTURE }}-${{ github.run_id }}
133+
restore-keys: |
134+
bazel-repo-cache-${{ inputs.ARCHITECTURE }}-
135+
136+
# Always create the seed dirs so --build-context is always valid (no-op if empty)
137+
- name: Prepare Bazel cache seed directories
138+
shell: bash
139+
run: |
140+
mkdir -p /tmp/bazel-disk-cache
141+
mkdir -p /tmp/bazel-repo-cache
142+
109143
# MEALKIT BUILD
110144
- name: Set docker metadata - mealkit
111145
id: mealkit-metadata
@@ -134,6 +168,8 @@ runs:
134168
ssh: default
135169
secret-files: |
136170
"SSH_KNOWN_HOSTS=${{ steps.setup-ssh.outputs.known-hosts-file }}"
171+
build-contexts: |
172+
bazel-disk-seed=/tmp/bazel-disk-cache
137173
build-args: |
138174
BASE_IMAGE=${{ inputs.BASE_IMAGE }}
139175
BAZEL_CACHE=${{ inputs.bazel-remote-cache-url }}
@@ -173,6 +209,53 @@ runs:
173209
BUILD_DATE=${{ inputs.BUILD_DATE }}
174210
${{ inputs.EXTRA_BUILD_ARGS }}
175211
212+
# BAZEL CACHE EXPORT
213+
- name: Export Bazel disk cache
214+
if: inputs.ENABLE_BAZEL_DISK_CACHE == 'true'
215+
uses: docker/build-push-action@v5
216+
with:
217+
context: ${{ inputs.DOCKER_CONTEXT }}
218+
push: false
219+
file: ${{ inputs.DOCKERFILE }}
220+
platforms: linux/${{ inputs.ARCHITECTURE }}
221+
target: bazel-disk-export
222+
outputs: type=local,dest=/tmp/bazel-disk-cache-new
223+
build-contexts: |
224+
bazel-disk-seed=/tmp/bazel-disk-cache
225+
build-args: |
226+
BASE_IMAGE=${{ inputs.BASE_IMAGE }}
227+
BUILD_DATE=${{ inputs.BUILD_DATE }}
228+
${{ inputs.EXTRA_BUILD_ARGS }}
229+
230+
- name: Save Bazel disk cache
231+
if: inputs.ENABLE_BAZEL_DISK_CACHE == 'true'
232+
uses: actions/cache/save@v4
233+
with:
234+
path: /tmp/bazel-disk-cache-new
235+
key: bazel-disk-cache-${{ inputs.ARCHITECTURE }}-${{ github.run_id }}
236+
237+
- name: Export Bazel repo cache
238+
if: inputs.ENABLE_BAZEL_REPO_CACHE == 'true'
239+
uses: docker/build-push-action@v5
240+
with:
241+
context: ${{ inputs.DOCKER_CONTEXT }}
242+
push: false
243+
file: ${{ inputs.DOCKERFILE }}
244+
platforms: linux/${{ inputs.ARCHITECTURE }}
245+
target: bazel-repo-export
246+
outputs: type=local,dest=/tmp/bazel-repo-cache-new
247+
build-args: |
248+
BASE_IMAGE=${{ inputs.BASE_IMAGE }}
249+
BUILD_DATE=${{ inputs.BUILD_DATE }}
250+
${{ inputs.EXTRA_BUILD_ARGS }}
251+
252+
- name: Save Bazel repo cache
253+
if: inputs.ENABLE_BAZEL_REPO_CACHE == 'true'
254+
uses: actions/cache/save@v4
255+
with:
256+
path: /tmp/bazel-repo-cache-new
257+
key: bazel-repo-cache-${{ inputs.ARCHITECTURE }}-${{ github.run_id }}
258+
176259
# SITREP GENERATION
177260
- name: Generate sitrep
178261
if: "!cancelled()"

.github/container/Dockerfile.jax

Lines changed: 43 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -19,14 +19,24 @@ ARG SRC_PATH_TRANSFORMER_ENGINE=/opt/transformer-engine
1919
ARG GIT_USER_NAME="JAX Toolbox"
2020
ARG GIT_USER_EMAIL=jax@nvidia.com
2121

22-
ARG BAZEL_CACHE=/tmp
22+
ARG BAZEL_CACHE=/cache/bazel-disk
2323
ARG BUILD_DATE
2424

25+
###############################################################################
26+
## Bazel disk cache seed (overridden via --build-context on cache hit)
27+
###############################################################################
28+
29+
# On first run this is empty (FROM scratch). When actions/cache restores a
30+
# previous disk cache to /tmp/bazel-disk-cache on the runner, the caller passes
31+
# --build-context bazel-disk-seed=/tmp/bazel-disk-cache to inject it.
32+
FROM scratch AS bazel-disk-seed
33+
2534
###############################################################################
2635
## Build JAX
2736
###############################################################################
2837

2938
FROM ${BASE_IMAGE} AS builder
39+
ARG TARGETARCH
3040
ARG URLREF_JAX
3141
ARG URLREF_TRANSFORMER_ENGINE
3242
ARG URLREF_XLA
@@ -54,9 +64,14 @@ RUN ARCH="$(dpkg --print-architecture)" && \
5464
chmod +x /usr/local/bin/bazel
5565
# Populate ${BUILD_PATH_JAXLIB} with editable wheels; --no-install because
5666
# (a) this is the builder stage, and (b) pip-finalize.sh does the install
57-
RUN mkdir -p /builder/extra-targets/{bin,python} && \
67+
RUN --mount=type=cache,id=bazel-disk-${TARGETARCH},target=/cache/bazel-disk,sharing=locked \
68+
--mount=type=cache,id=bazel-repo-${TARGETARCH},target=/cache/bazel-repo,sharing=locked \
69+
--mount=type=bind,from=bazel-disk-seed,source=.,target=/tmp/bazel-disk-seed,readonly \
70+
cp -a /tmp/bazel-disk-seed/. /cache/bazel-disk/ 2>/dev/null || true && \
71+
mkdir -p /builder/extra-targets/{bin,python} && \
5872
build-jax.sh \
5973
--bazel-cache ${BAZEL_CACHE} \
74+
--build-param --bazel_options=--repository_cache=/cache/bazel-repo \
6075
--build-path-jaxlib ${BUILD_PATH_JAXLIB} \
6176
--extra-targets "${EXTRA_BAZEL_TARGETS}" \
6277
--extra-target-dest /builder/extra-targets \
@@ -148,3 +163,29 @@ RUN install-nsys-jax.sh ${SRC_PATH_NSYS_JAX}
148163

149164
FROM mealkit AS final
150165
RUN pip-finalize.sh
166+
167+
###############################################################################
168+
## Bazel cache export stages (used by CI to persist caches via actions/cache)
169+
###############################################################################
170+
171+
# ARG BUILD_DATE ensures this always re-executes (never a registry cache hit),
172+
# so the snapshot always reflects the current run's cache mount content.
173+
FROM ${BASE_IMAGE} AS bazel-disk-snapshot
174+
ARG TARGETARCH
175+
ARG BUILD_DATE
176+
RUN --mount=type=cache,id=bazel-disk-${TARGETARCH},target=/cache/bazel-disk,sharing=locked,readonly \
177+
mkdir -p /bazel-disk-snapshot && \
178+
cp -rp /cache/bazel-disk/. /bazel-disk-snapshot/
179+
180+
FROM scratch AS bazel-disk-export
181+
COPY --from=bazel-disk-snapshot /bazel-disk-snapshot /
182+
183+
FROM ${BASE_IMAGE} AS bazel-repo-snapshot
184+
ARG TARGETARCH
185+
ARG BUILD_DATE
186+
RUN --mount=type=cache,id=bazel-repo-${TARGETARCH},target=/cache/bazel-repo,sharing=locked,readonly \
187+
mkdir -p /bazel-repo-snapshot && \
188+
cp -rp /cache/bazel-repo/. /bazel-repo-snapshot/
189+
190+
FROM scratch AS bazel-repo-export
191+
COPY --from=bazel-repo-snapshot /bazel-repo-snapshot /

.github/workflows/_build_base.yaml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ permissions:
5858
jobs:
5959

6060
build-base:
61-
runs-on: [self-hosted, "${{ inputs.ARCHITECTURE }}", small]
61+
runs-on: ${{ inputs.ARCHITECTURE == 'amd64' && 'linux-amd64-cpu32m' || 'linux-arm64-cpu32m' }}
6262
env:
6363
BADGE_FILENAME_FULL: ${{ inputs.BADGE_FILENAME }}-${{ inputs.ARCHITECTURE }}.json
6464
outputs:
@@ -137,7 +137,7 @@ jobs:
137137
BUILD_DATE=${{ inputs.BUILD_DATE }}
138138
JAX_TOOLBOX_REF=${{ github.head_ref || github.sha }}
139139
${{ inputs.BASE_IMAGE != 'latest' && format('BASE_IMAGE={0}', inputs.BASE_IMAGE) || '' }}
140-
140+
141141
- name: Generate sitrep
142142
if: "!cancelled()"
143143
shell: bash -x -e {0}

.github/workflows/_ci.yaml

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,8 @@ jobs:
8383
ssh-known-hosts: ${{ vars.SSH_KNOWN_HOSTS }}
8484
github-token: ${{ secrets.GITHUB_TOKEN }}
8585
bazel-remote-cache-url: ${{ vars.BAZEL_REMOTE_CACHE_URL }}
86+
ENABLE_BAZEL_DISK_CACHE: 'true'
87+
ENABLE_BAZEL_REPO_CACHE: 'true'
8688
EXTRA_BUILD_ARGS: |
8789
URLREF_JAX=${{ fromJson(inputs.SOURCE_URLREFS).JAX }}
8890
URLREF_XLA=${{ fromJson(inputs.SOURCE_URLREFS).XLA }}

.github/workflows/ci.yaml

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -4,12 +4,10 @@ on:
44
schedule:
55
- cron: '30 9 * * *' # Pacific Time 01:30 AM in UTC
66
- cron: '0 0 * * 6' #midnight every Saturday UTC for scale-training
7-
pull_request:
8-
types:
9-
- opened
10-
- reopened
11-
- ready_for_review
12-
- synchronize
7+
push:
8+
# we need this to allow nv-gha-runners to run
9+
branches:
10+
- "**"
1311
paths-ignore:
1412
- '**.md'
1513
- '.github/triage/**'

0 commit comments

Comments
 (0)