Configuration for using the Spark DGX as an inference server for Lex
nvidia-smi
docker run --rm --gpus all nvidia/cuda:12.0-base nvidia-smi
uv run download_models.py
docker compose pull
docker compose up -d
docker compose logs -f
docker compose logs -f vllm-gemma-large
docker compose ps
docker compose restart vllm-gemma-large
docker compose stop vllm-gemma-large
docker stats
docker compose pull vllm-gemma-large && docker compose up -d vllm-gemma-large
docker compose down
docker compose down -v