-
Notifications
You must be signed in to change notification settings - Fork 4
Expand file tree
/
Copy pathwait_for_llm.py
More file actions
70 lines (52 loc) · 1.28 KB
/
wait_for_llm.py
File metadata and controls
70 lines (52 loc) · 1.28 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import argparse
import openai
import time
TEST_MESSAGE = [
{
"role": "system",
"content": "You are a helpful assistant."
},
{
"role": "user",
"content": "Are you ready?"
},
]
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument(
"--model_name",
type = str,
default = "meta-llama/Llama-3.3-70B-Instruct",
)
parser.add_argument(
"--api_key",
type = str,
default = "token-abc123",
)
parser.add_argument(
"--llm_endpoint",
type = str,
default = "http://localhost:8000/v1",
)
args = parser.parse_args()
client_kwargs = {
"api_key": args.api_key,
"base_url": args.llm_endpoint
}
generation_kwargs = {
"model": args.model_name,
"max_tokens": 32,
}
client = openai.OpenAI(
**client_kwargs
)
is_vllm_ready = False
while not is_vllm_ready:
try: # wait for vLLM to be ready
response = client.chat.completions.create(
messages = TEST_MESSAGE,
**generation_kwargs
)
is_vllm_ready = True
print("vLLM started successfully.")
except Exception: time.sleep(5)