rag-content/Containerfile-gpu at main · lightspeed-core/rag-content · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
# Image with GPU CUDA only backend.
FROM nvcr.io/nvidia/cuda:12.9.1-devel-ubi9

# Install Python
RUN dnf install -y --nodocs --setopt=keepcache=0 --setopt=tsflags=nodocs \
    python3.12 python3.12-devel python3.12-pip libcudnn9 libnccl libcusparselt0 && \
    dnf clean all
RUN ln -sf /usr/bin/python3.12 /usr/bin/python

# Install asciidoctor
RUN dnf install -y rubygems && \
    dnf clean all && \
    gem install asciidoctor
# Install uv package manager
RUN pip3.12 install uv==0.7.20

WORKDIR /rag-content

COPY Makefile pyproject.toml uv.lock README.md ./
COPY src ./src
COPY tests ./tests
COPY scripts ./scripts

# Configure UV environment variables for optimal performance
# Pytorch backend - cpu. `uv` contains convenient way to specify the backend.
ENV UV_COMPILE_BYTECODE=0 \
    UV_PYTHON_DOWNLOADS=0

# Remove pytorch-cpu dependency from pyproject.toml
RUN uv venv && uv pip install tomlkit
RUN uv run python ./scripts/remove_pytorch_cpu_pyproject.py

# Update uv.lock file and install dependencies
RUN rm -f uv.lock && rm -rf .venv && uv lock && uv sync --locked --no-install-project

# Then, add the rest of the project source code and install it
# Installing separately from its dependencies allows optimal layer caching
RUN uv sync --locked

# Add executables from .venv to system PATH
ENV PATH="/rag-content/.venv/bin:$PATH"

# Download embeddings model
ENV EMBEDDING_MODEL=sentence-transformers/all-mpnet-base-v2
RUN python ./scripts/download_embeddings_model.py \
       -l ./embeddings_model \
       -r ${EMBEDDING_MODEL}

# Reset the entrypoint.
ENTRYPOINT []

LABEL description="Contains embedding model and dependencies needed to generate a vector database"