Skip to content

Commit 93e44d8

Browse files
committed
优化
1 parent 60a5d57 commit 93e44d8

File tree

3 files changed

+18
-10
lines changed

3 files changed

+18
-10
lines changed

gpt_server/model_backend/sglang_backend.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -169,7 +169,9 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
169169
usage = pre_usage
170170
pre_usage = usage
171171
try:
172-
reasoning_content = choices[0]["delta"]["reasoning_content"]
172+
reasoning_content = choices[0]["delta"].get(
173+
"reasoning_content", None
174+
)
173175
text = choices[0]["delta"]["content"]
174176
if text is None:
175177
text = ""

gpt_server/model_backend/vllm_backend.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@ def __init__(self, model_path, tokenizer: PreTrainedTokenizer) -> None:
8484
# ),
8585
prefix_caching_hash_algo="xxhash",
8686
structured_outputs_config=StructuredOutputsConfig(backend="xgrammar"),
87-
enforce_eager=True,
87+
enforce_eager=False,
8888
)
8989
self.engine = AsyncLLMEngine.from_engine_args(self.engine_args)
9090
models = OpenAIServingModels(
@@ -231,7 +231,9 @@ async def stream_chat(self, params: Dict[str, Any]) -> AsyncGenerator:
231231
reasoning_content = None
232232
try:
233233
text = choices[0]["delta"]["content"]
234-
reasoning_content = choices[0]["delta"]["reasoning_content"]
234+
reasoning_content = choices[0]["delta"].get(
235+
"reasoning_content", None
236+
)
235237
except Exception:
236238
logger.error(
237239
f"Error in processing chunk: {chunk_dict}",

gpt_server/model_worker/auto.py

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,16 @@
11
import json
2+
import traceback
23
from typing import List
4+
35
from fastchat.constants import ErrorCode, SERVER_ERROR_MSG
46
from loguru import logger
57
import torch
6-
import traceback
7-
from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
8+
from vllm.tool_parsers import ToolParserManager
9+
810
from gpt_server.model_handler.tool_parser import tool_parser
11+
from gpt_server.model_worker.base.model_worker_base import ModelWorkerBase
912
from gpt_server.model_worker.utils import guess_tool_parser_by_model
10-
11-
from vllm.tool_parsers import ToolParserManager
13+
from gpt_server.settings import get_model_config
1214

1315

1416
class AutoWorker(ModelWorkerBase):
@@ -38,14 +40,16 @@ def __init__(
3840
self.stop = [
3941
self.tokenizer.decode(skip_word) for skip_word in self.stop_words_ids
4042
]
41-
logger.warning(f"{model_names[0]} 停用词: {self.stop}")
42-
4343
tool_parser_name = guess_tool_parser_by_model(model_path)
44-
logger.warning(f"{model_names[0]} 工具解析器: {tool_parser_name}")
44+
model_config = get_model_config()
45+
4546
# from https://github.com/xorbitsai/inference/blob/c70ea74fa820a613f8d577047ef1818da20a96b3/xinference/model/llm/llm_family_modelscope.json
4647
self.tool_parser = ToolParserManager.get_tool_parser(tool_parser_name)(
4748
self.tokenizer
4849
)
50+
logger.warning(
51+
f"已启动模型: {model_names[0]} | 工具解析器: {tool_parser_name} | 推理解析器: {model_config.reasoning_parser}"
52+
)
4953

5054
async def generate_stream_gate(self, params):
5155
self.call_ct += 1

0 commit comments

Comments
 (0)