Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions docs/model_support.md
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ After these steps, the new model should be compatible with most FastChat feature
## API-Based Models
To support an API-based model, consider learning from the existing OpenAI example.
If the model is compatible with OpenAI APIs, then a configuration file is all that's needed without any additional code.
For custom protocols, implementation of a streaming generator in [fastchat/serve/api_provider.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/api_provider.py) is required, following the provided examples. Currently, FastChat is compatible with OpenAI, Anthropic, Google Vertex AI, Mistral, Nvidia NGC, YandexGPT and Reka.
For custom protocols, implementation of a streaming generator in [fastchat/serve/api_provider.py](https://github.com/lm-sys/FastChat/blob/main/fastchat/serve/api_provider.py) is required, following the provided examples. Currently, FastChat is compatible with OpenAI, Anthropic, Google Vertex AI, Mistral, Nvidia NGC, YandexGPT, Reka, and JAB.

### Steps to Launch a WebUI with an API Model
1. Specify the endpoint information in a JSON configuration file. For instance, create a file named `api_endpoints.json`:
Expand All @@ -126,7 +126,7 @@ For custom protocols, implementation of a streaming generator in [fastchat/serve
}
}
```
- "api_type" can be one of the following: openai, anthropic, gemini, mistral, yandexgpt or reka. For custom APIs, add a new type and implement it accordingly.
- "api_type" can be one of the following: openai, anthropic, gemini, mistral, yandexgpt, reka, or jab. For custom APIs, add a new type and implement it accordingly.
- "anony_only" indicates whether to display this model in anonymous mode only.
- "recommended_config" indicates the recommended generation parameters for temperature and top_p.
- "text-arena" indicates whether the model should be displayed in the Text Arena.
Expand Down
15 changes: 15 additions & 0 deletions fastchat/conversation.py
Original file line number Diff line number Diff line change
Expand Up @@ -608,6 +608,21 @@ def to_metagen_api_messages(self):
ret.append({"role": "ai", "text": msg})
return ret

def to_jab_api_messages(self):
"""Convert the conversation to JAB format."""
if self.system_message == "":
ret = []
else:
ret = [{"role": "system", "content": self.system_message}]

for i, (_, msg) in enumerate(self.messages[self.offset :]):
if i % 2 == 0:
ret.append({"role": "user", "content": msg})
else:
if msg is not None:
ret.append({"role": "assistant", "content": msg})
return ret

def save_new_images(self, has_csam_images=False, use_remote_storage=False):
import hashlib
from fastchat.constants import LOGDIR
Expand Down
11 changes: 11 additions & 0 deletions fastchat/model/model_adapter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2500,6 +2500,16 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("api_based_default")


class JABAdapter(BaseModelAdapter):
"""The model adapter for JAB"""

def match(self, model_path: str):
return "jab" in model_path.lower()

def get_default_conv_template(self, model_path: str) -> Conversation:
return get_conv_template("api_based_default")


# Note: the registration order matters.
# The one registered earlier has a higher matching priority.
register_model_adapter(PeftModelAdapter)
Expand Down Expand Up @@ -2572,6 +2582,7 @@ def get_default_conv_template(self, model_path: str) -> Conversation:
register_model_adapter(Llama2ChangAdapter)
register_model_adapter(ZephyrAdapter)
register_model_adapter(NotusAdapter)
register_model_adapter(JABAdapter)
register_model_adapter(CatPPTAdapter)
register_model_adapter(TinyLlamaAdapter)
register_model_adapter(XwinLMAdapter)
Expand Down
7 changes: 7 additions & 0 deletions fastchat/model/model_registry.py
Original file line number Diff line number Diff line change
Expand Up @@ -1000,3 +1000,10 @@ def get_model_info(name: str) -> ModelInfo:
"https://huggingface.co/cllm",
"consistency-llm is a new generation of parallel decoder LLMs with fast generation speed.",
)

register_model_info(
["jab-0.3"],
"JAB",
"https://github.com/Applied-General-Intelligence-Inc",
"Just Another Bot prototype by AGI.",
)
57 changes: 57 additions & 0 deletions fastchat/serve/api_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,15 @@ def get_api_provider_stream_iter(
api_key=model_api_dict["api_key"],
extra_body=extra_body,
)
elif model_api_dict["api_type"] == "jab":
messages = conv.to_jab_api_messages()
stream_iter = jab_api_stream_iter(
model_name=model_api_dict["model_name"],
messages=messages,
api_base=model_api_dict["api_base"],
api_key=model_api_dict["api_key"],
conversation_id=state.conv_id,
)
else:
raise NotImplementedError()

Expand Down Expand Up @@ -1345,3 +1354,51 @@ def metagen_api_stream_iter(
"text": f"**API REQUEST ERROR** Reason: Unknown.",
"error_code": 1,
}


def jab_api_stream_iter(model_name, messages, api_base, api_key, conversation_id):
import requests

headers = {"Content-Type": "application/json", "x-api-key": api_key}

text_messages = []
for message in messages:
text_messages.append(message)

payload = {
"model": model_name,
"messages": text_messages,
"conversation_id": conversation_id,
}

logger.info(f"==== request ====\n{payload}")

try:
response = requests.post(api_base, json=payload, headers=headers)

if response.status_code != 200:
logger.error(
f"Unexpected response ({response.status_code}): {response.text}"
)
yield {
"text": f"**API REQUEST FAILED** Reason: {response.status_code}.",
"error_code": 1,
}

text = response.json()["choices"][0]["message"]["content"]
pos = 0
while pos < len(text):
# simulate token streaming
pos += 5
time.sleep(0.001)
data = {
"text": text[:pos],
"error_code": 0,
}
yield data
except Exception as e:
logger.error(f"==== error ====\n{e}")
yield {
"text": f"**API REQUEST ERROR** Reason: Unknown.",
"error_code": 1,
}