prime-rl/.github/workflows/gpu_tests.yaml at main · PrimeIntellect-ai/prime-rl · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
name: GPU Tests

on:
  push:
    branches: [main]
  pull_request:
    types: [opened, synchronize, reopened, ready_for_review]

concurrency:
  group: ${{ github.workflow }}-${{ github.ref }}
  cancel-in-progress: ${{ github.event_name == 'pull_request' }}

jobs:
  unit-tests:
    name: Unit tests
    runs-on: vm
    container:
      image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
      options: --gpus all
    timeout-minutes: 45
    if: github.event_name == 'push' || github.event.pull_request.draft == false
    steps:
      - name: Install Git
        run: |
          apt-get update
          apt-get install -y git
          git config --global --add safe.directory "$GITHUB_WORKSPACE"
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          submodules: true
      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true
          cache-dependency-glob: "uv.lock"
      - name: Install dependencies
        run: uv sync --all-extras --locked
      - name: Run unit tests
        env:
          USERNAME_CI: CI_RUNNER
          WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
          # Set WANDB_MODE to online only if WANDB_API_KEY is available, otherwise set to offline
          # This is to allow running tests on forks without WANDB_API_KEY
          WANDB_MODE: ${{ secrets.WANDB_API_KEY && 'online' || 'offline' }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          PRIME_API_KEY: ${{ secrets.PRIME_API_KEY }}
          GITHUB_REF_NAME: ${{ github.ref_name }}
          GITHUB_HEAD_REF: ${{ github.head_ref }}
          PYTEST_OUTPUT_DIR: /tmp/outputs
        run: uv run pytest tests/unit -m gpu

  integration-tests:
    name: Integration tests (${{ matrix.test }})
    runs-on: ${{ matrix.runner }}
    container:
      image: pytorch/pytorch:2.5.1-cuda12.4-cudnn9-devel
      options: --gpus all
    timeout-minutes: 60
    if: github.event_name == 'push' || github.event.pull_request.draft == false
    strategy:
      fail-fast: false
      matrix:
        include:
          - test: base
            runner: vm
            pytest_args: "tests/integration -m gpu --ignore=tests/integration/test_rl_lora.py --ignore=tests/integration/test_rl_multi_run_lora.py --ignore=tests/integration/test_benchmark_regression.py --ignore=tests/integration/test_alphabet_sort.py"
          - test: rl_lora
            runner: vm
            pytest_args: "tests/integration/test_rl_lora.py -m gpu"
          - test: alphabet_sort
            runner: vm
            pytest_args: "tests/integration/test_alphabet_sort.py -m gpu"
          - test: rl_multi_run
            runner: 4xa6000
            pytest_args: "tests/integration/test_rl_multi_run_lora.py -m gpu"
          - test: benchmark_regression
            runner: 4xa6000
            pytest_args: "tests/integration/test_benchmark_regression.py -m gpu"
    steps:
      - name: Install Git
        run: |
          apt-get update
          apt-get install -y git
          git config --global --add safe.directory "$GITHUB_WORKSPACE"
      - name: Checkout repository
        uses: actions/checkout@v4
        with:
          submodules: true
      - name: Install uv
        uses: astral-sh/setup-uv@v5
        with:
          enable-cache: true
          cache-dependency-glob: "uv.lock"
      - name: Install dependencies
        run: uv sync --all-extras --locked
      - name: Run integration tests
        env:
          USERNAME_CI: CI_RUNNER
          WANDB_API_KEY: ${{ secrets.WANDB_API_KEY }}
          # Set WANDB_MODE to online only if WANDB_API_KEY is available, otherwise set to offline
          # This is to allow running tests on forks without WANDB_API_KEY
          WANDB_MODE: ${{ secrets.WANDB_API_KEY && 'online' || 'offline' }}
          HF_TOKEN: ${{ secrets.HF_TOKEN }}
          OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
          PRIME_API_KEY: ${{ secrets.PRIME_API_KEY }}
          PRIME_BASE_URL: ${{ secrets.PRIME_BASE_URL }}
          PRIME_TEAM_ID: ${{ secrets.PRIME_TEAM_ID }}
          GITHUB_REF_NAME: ${{ github.ref_name }}
          GITHUB_HEAD_REF: ${{ github.head_ref }}
          PYTEST_OUTPUT_DIR: /tmp/outputs
          VLLM_WORKER_MULTIPROC_METHOD: spawn
        run: uv run pytest ${{ matrix.pytest_args }}