-
Notifications
You must be signed in to change notification settings - Fork 220
114 lines (110 loc) · 4.26 KB
/
gpu_tests.yaml
File metadata and controls
114 lines (110 loc) · 4.26 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
name: GPU Tests
on:
push:
branches: [main]
pull_request:
types: [opened, synchronize, reopened, ready_for_review]
concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: ${{ github.event_name == 'pull_request' }}
jobs:
unit-tests:
name: Unit tests
runs-on: vm
container:
image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
options: --gpus all
timeout-minutes: 45
if: github.event_name == 'push' || github.event.pull_request.draft == false
steps:
- name: Install Git
run: |
apt-get update
apt-get install -y git
git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: true
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
- name: Install dependencies
run: uv sync --all-extras --locked
- name: Run unit tests
env:
USERNAME_CI: CI_RUNNER
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
# Set WANDB_MODE to online only if WANDB_API_KEY is available, otherwise set to offline
# This is to allow running tests on forks without WANDB_API_KEY
WANDB_MODE: ${{ secrets.WANDB_API_KEY && 'online' || 'offline' }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PRIME_API_KEY: ${{ secrets.PRIME_API_KEY }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_HEAD_REF: ${{ github.head_ref }}
PYTEST_OUTPUT_DIR: /tmp/outputs
run: uv run pytest tests/unit -m gpu
integration-tests:
name: Integration tests (${{ matrix.test }})
runs-on: ${{ matrix.runner }}
container:
image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
options: --gpus all
timeout-minutes: 60
if: github.event_name == 'push' || github.event.pull_request.draft == false
strategy:
fail-fast: false
matrix:
include:
- test: base
runner: vm
pytest_args: "tests/integration -m gpu --ignore=tests/integration/test_rl_lora.py --ignore=tests/integration/test_rl_multi_run_lora.py --ignore=tests/integration/test_benchmark_regression.py --ignore=tests/integration/test_alphabet_sort.py"
- test: rl_lora
runner: vm
pytest_args: "tests/integration/test_rl_lora.py -m gpu"
- test: alphabet_sort
runner: vm
pytest_args: "tests/integration/test_alphabet_sort.py -m gpu"
- test: rl_multi_run
runner: 4xa6000
pytest_args: "tests/integration/test_rl_multi_run_lora.py -m gpu"
- test: benchmark_regression
runner: 4xa6000
pytest_args: "tests/integration/test_benchmark_regression.py -m gpu"
steps:
- name: Install Git
run: |
apt-get update
apt-get install -y git
git config --global --add safe.directory "$GITHUB_WORKSPACE"
- name: Checkout repository
uses: actions/checkout@v4
with:
submodules: true
- name: Install uv
uses: astral-sh/setup-uv@v5
with:
enable-cache: true
cache-dependency-glob: "uv.lock"
- name: Install dependencies
run: uv sync --all-extras --locked
- name: Run integration tests
env:
USERNAME_CI: CI_RUNNER
WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
# Set WANDB_MODE to online only if WANDB_API_KEY is available, otherwise set to offline
# This is to allow running tests on forks without WANDB_API_KEY
WANDB_MODE: ${{ secrets.WANDB_API_KEY && 'online' || 'offline' }}
HF_TOKEN: ${{ secrets.HF_TOKEN }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
PRIME_API_KEY: ${{ secrets.PRIME_API_KEY }}
PRIME_BASE_URL: ${{ secrets.PRIME_BASE_URL }}
PRIME_TEAM_ID: ${{ secrets.PRIME_TEAM_ID }}
GITHUB_REF_NAME: ${{ github.ref_name }}
GITHUB_HEAD_REF: ${{ github.head_ref }}
PYTEST_OUTPUT_DIR: /tmp/outputs
VLLM_WORKER_MULTIPROC_METHOD: spawn
run: uv run pytest ${{ matrix.pytest_args }}