Skip to content

Commit aa89c61

Browse files
authored
Pub master (#2235)
* improve image gen plugin * Squashed commit of the following: commit d8ae942 Author: binaryhusky <[email protected]> Date: Sun Jan 25 12:04:55 2026 +0000 Update .gitignore to include wandb and remove OpenJudge subproject commit 403fc80 Author: lbykkkk <[email protected]> Date: Fri Jan 9 10:19:40 2026 +0000 Add project documentation with MkDocs setup
1 parent 0aa0472 commit aa89c61

File tree

80 files changed

+20219
-26
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

80 files changed

+20219
-26
lines changed

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,3 +165,4 @@ experimental_mods
165165
search_results
166166
gg.docx
167167
unstructured_reader.py
168+
wandb

config.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -369,6 +369,13 @@
369369
AUTO_CONTEXT_MAX_CLIP_RATIO = [0.80, 0.60, 0.45, 0.25, 0.20, 0.18, 0.16, 0.14, 0.12, 0.10, 0.08, 0.07, 0.06, 0.05, 0.04, 0.03, 0.02, 0.01]
370370

371371

372+
373+
# DO NOT USE, UNDER DEVELOPMENT
374+
REROUTE_ALL_TO_ONE_API = False
375+
ONE_API_URL = ""
376+
ONE_API_KEY = "$API_KEY"
377+
378+
372379
"""
373380
--------------- 配置关联关系说明 ---------------
374381

crazy_functions/Image_Generate.py

Lines changed: 206 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,14 @@
1-
from toolbox import CatchException, update_ui, get_conf, select_api_key, get_log_folder
1+
import requests
2+
import base64
3+
import json
4+
import time
5+
import os
6+
from request_llms.bridge_chatgpt import make_multimodal_input
7+
from toolbox import CatchException, have_any_recent_upload_image_files, update_ui, get_conf, select_api_key, get_log_folder, update_ui_latest_msg
28
from crazy_functions.multi_stage.multi_stage_utils import GptAcademicState
3-
9+
from loguru import logger
410

511
def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", quality=None, style=None):
6-
import requests, json, time, os
712
from request_llms.bridge_all import model_info
813

914
proxies = get_conf('proxies')
@@ -47,7 +52,6 @@ def gen_image(llm_kwargs, prompt, resolution="1024x1024", model="dall-e-2", qual
4752

4853

4954
def edit_image(llm_kwargs, prompt, image_path, resolution="1024x1024", model="dall-e-2"):
50-
import requests, json, time, os
5155
from request_llms.bridge_all import model_info
5256

5357
proxies = get_conf('proxies')
@@ -106,7 +110,7 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
106110
history = [] # 清空历史,以免输入溢出
107111
if prompt.strip() == "":
108112
chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
109-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
113+
yield from update_ui(chatbot=chatbot, history=history)
110114
return
111115
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 使用前请切换模型到GPT系列。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
112116
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
@@ -119,15 +123,15 @@ def 图片生成_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
119123
f'本地文件地址: <br/>`{image_path}`<br/>'+
120124
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
121125
])
122-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
126+
yield from update_ui(chatbot=chatbot, history=history)
123127

124128

125129
@CatchException
126130
def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
127131
history = [] # 清空历史,以免输入溢出
128132
if prompt.strip() == "":
129133
chatbot.append((prompt, "[Local Message] 图像生成提示为空白,请在“输入区”输入图像生成提示。"))
130-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
134+
yield from update_ui(chatbot=chatbot, history=history)
131135
return
132136
chatbot.append(("您正在调用“图像生成”插件。", "[Local Message] 生成图像, 使用前请切换模型到GPT系列。如果中文Prompt效果不理想, 请尝试英文Prompt。正在处理中 ....."))
133137
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
@@ -150,7 +154,200 @@ def 图片生成_DALLE3(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
150154
f'本地文件地址: <br/>`{image_path}`<br/>'+
151155
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
152156
])
153-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
157+
yield from update_ui(chatbot=chatbot, history=history)
158+
159+
160+
161+
def gen_image_banana(chatbot, history, text_prompt, image_base64_list=None, resolution="1K", aspectRatio="1:1", model="nano-banana"):
162+
"""
163+
Generate image using Nano-banana API (optimized DALL-E format API)
164+
165+
Args:
166+
text_prompt: Text description for image generation
167+
image_base64_list: List of base64 encoded images or URLs (optional, for image-to-image)
168+
resolution: Image size, one of: "1K", "2K", "4K" (default: "1K")
169+
aspectRatio: Aspect ratio like "1:1", "16:9", "3:4", "4:3", "9:16", "2:3", "3:2", "4:5", "5:4", "21:9" (default: "1:1")
170+
model: Model name, "nano-banana" or "nano-banana-hd" for 4K quality (default: "nano-banana")
171+
172+
Returns:
173+
tuple: (image_url, local_file_path)
174+
"""
175+
176+
177+
proxies = get_conf('proxies')
178+
179+
# Get API configuration
180+
if not get_conf('REROUTE_ALL_TO_ONE_API'):
181+
api_key = get_conf('GEMINI_API_KEY')
182+
# Default to a generic endpoint if not using ONE_API
183+
base_url = get_conf('GEMINI_BASE_URL') if get_conf('GEMINI_BASE_URL') else "https://api.example.com"
184+
if base_url.endswith('/v1'):
185+
base_url = base_url[:-3]
186+
url = base_url + "/v1/images/generations"
187+
download_image_proxies = proxies
188+
else:
189+
url = get_conf('ONE_API_URL')
190+
api_key = get_conf('ONE_API_KEY')
191+
if api_key == '$API_KEY':
192+
api_key = get_conf('API_KEY')
193+
download_image_proxies = proxies
194+
proxies = None
195+
196+
headers = {
197+
'Authorization': f'Bearer {api_key}',
198+
'Content-Type': 'application/json'
199+
}
200+
201+
# Make API request
202+
203+
try:
204+
payload = {
205+
"model": "google/gemini-3-pro-image-preview",
206+
"messages": [
207+
{
208+
"role": "user",
209+
"content": [
210+
{
211+
"type": "text",
212+
"text": text_prompt
213+
},
214+
]
215+
}
216+
],
217+
"modalities": ["image", "text"],
218+
"image_config": {
219+
"aspect_ratio": aspectRatio,
220+
"image_size": resolution
221+
}
222+
}
223+
224+
for image_base64 in image_base64_list:
225+
# {
226+
# "type": "image_url",
227+
# "image_url": {
228+
# "url": "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
229+
# }
230+
# }
231+
# img = f"data:image/jpeg;base64,{base64_image}"
232+
233+
payload["messages"][0]["content"].append({
234+
"type": "image_url",
235+
"image_url": {
236+
"url": f"data:image/jpeg;base64,{image_base64}"
237+
}
238+
})
239+
240+
241+
response = requests.post(url, headers=headers, json=payload)
242+
result = response.json()
243+
image_url = None
244+
generated_content = ""
245+
if result.get("choices"):
246+
message = result["choices"][0]["message"]
247+
if message.get("images"):
248+
generated_content = message.get('reasoning', "") + message.get('content', "")
249+
for image in message["images"]:
250+
image_url = image["image_url"]["url"]
251+
print(f"Generated image: {image_url[:50]}...")
252+
253+
254+
if response.status_code != 200:
255+
yield from update_ui_latest_msg(lastmsg=f"Generate Failed\n\n{generated_content}\n\nStatus Code: {response.status_code}", chatbot=chatbot, history=history, delay=0)
256+
return
257+
258+
if image_url is None:
259+
raise RuntimeError("No image URL found in the response.")
260+
261+
logger.info(f'Generated image.')
262+
yield from update_ui_latest_msg(lastmsg=f"Downloading image", chatbot=chatbot, history=history, delay=0)
263+
264+
if ';base64,' in image_url:
265+
base64_string = image_url.split('base64,')[-1]
266+
image_data = base64.b64decode(base64_string)
267+
file_path = f'{get_log_folder()}/image_gen/'
268+
os.makedirs(file_path, exist_ok=True)
269+
file_name = 'Image' + time.strftime("%Y-%m-%d-%H-%M-%S", time.localtime()) + '.png'
270+
fp = file_path+file_name
271+
with open(fp, 'wb+') as f: f.write(image_data)
272+
else:
273+
raise ValueError("Invalid image URL format.")
274+
275+
return image_url, fp
276+
277+
except Exception as e:
278+
yield from update_ui_latest_msg(lastmsg=f"Generate failed, please try again later.", chatbot=chatbot, history=history, delay=0)
279+
raise RuntimeError(f"Failed to generate image, please try again later: {str(e)}")
280+
281+
282+
283+
284+
285+
286+
287+
288+
289+
@CatchException
290+
def 图片生成_NanoBanana(prompt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
291+
history = [] # 清空历史,以免输入溢出
292+
293+
if prompt.strip() == "":
294+
chatbot.append((prompt, "[Local Message] 图像生成提示为空白"))
295+
yield from update_ui(chatbot=chatbot, history=history)
296+
return
297+
chatbot.append((
298+
prompt,
299+
"正在调用 NanoBanana 图像生成, 正在处理中 ....."
300+
))
301+
302+
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 由于请求gpt需要一段时间,我们先及时地做一次界面更新
303+
if ("advanced_arg" in plugin_kwargs) and (plugin_kwargs["advanced_arg"] == ""): plugin_kwargs.pop("advanced_arg")
304+
305+
model = "nano-banana"
306+
resolution = plugin_kwargs["resolution"]
307+
aspectRatio = plugin_kwargs["aspect ratio"]
308+
309+
# Validate aspect ratio
310+
valid_ratios = ["1:1", "16:9", "9:16", "4:3", "3:4", "2:3", "3:2", "4:5", "5:4", "21:9"]
311+
if aspectRatio not in valid_ratios:
312+
aspectRatio = "1:1"
313+
314+
try:
315+
# get image from recent upload
316+
has_recent_image_upload, image_paths = have_any_recent_upload_image_files(chatbot, pop=True)
317+
if has_recent_image_upload:
318+
_, image_base64_array = make_multimodal_input(prompt, image_paths)
319+
else:
320+
_, image_base64_array = prompt, []
321+
322+
# get image from session storage
323+
if 'session_file_storage' in chatbot._cookies:
324+
try:
325+
image_base64_array += [base64.b64encode(open(chatbot._cookies['session_file_storage'], 'rb').read()).decode('utf-8')]
326+
except:
327+
logger.exception("Failed to read session_file_storage and parse to image base64.")
328+
329+
# only keep last image if any
330+
if len(image_base64_array) > 1:
331+
image_base64_array = [image_base64_array[-1]]
332+
333+
# Generate image
334+
_, image_path = yield from gen_image_banana(chatbot, history, prompt, image_base64_list=image_base64_array, resolution=resolution, aspectRatio=aspectRatio, model=model)
335+
336+
# Build response message
337+
response_msg = f'模型: {model}<br/>分辨率: {resolution}<br/>比例: {aspectRatio}<br/><br/>'
338+
response_msg += f'本地文件地址: <br/>`{image_path}`<br/>'
339+
response_msg += f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
340+
341+
# register image
342+
chatbot._cookies['session_file_storage'] = image_path
343+
344+
yield from update_ui_latest_msg(lastmsg=response_msg, chatbot=chatbot, history=history, delay=0)
345+
346+
except Exception as e:
347+
chatbot.append([prompt, f'生成图像失败: {str(e)}'])
348+
349+
yield from update_ui(chatbot=chatbot, history=history)
350+
154351

155352

156353
class ImageEditState(GptAcademicState):
@@ -232,7 +429,7 @@ def 图片修改_DALLE2(prompt, llm_kwargs, plugin_kwargs, chatbot, history, sys
232429
f'本地文件地址: <br/>`{image_path}`<br/>'+
233430
f'本地文件预览: <br/><div align="center"><img src="file={image_path}"></div>'
234431
])
235-
yield from update_ui(chatbot=chatbot, history=history) # 刷新界面 界面更新
432+
yield from update_ui(chatbot=chatbot, history=history)
236433
state.unlock_plugin(chatbot)
237434

238435
def make_transparent(input_image_path, output_image_path):

crazy_functions/Image_Generate_Wrap.py

Lines changed: 26 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11

22
from toolbox import get_conf, update_ui
3-
from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2
3+
from crazy_functions.Image_Generate import 图片生成_DALLE2, 图片生成_DALLE3, 图片修改_DALLE2, 图片生成_NanoBanana
44
from crazy_functions.plugin_template.plugin_class_template import GptAcademicPluginTemplate, ArgProperty
55

66

@@ -21,33 +21,45 @@ def define_arg_selection_menu(self):
2121
"""
2222
gui_definition = {
2323
"main_input":
24-
ArgProperty(title="输入图片描述", description="需要生成图像的文本描述,尽量使用英文", default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
24+
ArgProperty(title="输入图片描述", description="需要生成图像的文本描述",
25+
default_value="", type="string").model_dump_json(), # 主输入,自动从输入框同步
2526
"model_name":
26-
ArgProperty(title="模型", options=["DALLE2", "DALLE3"], default_value="DALLE3", description="无", type="dropdown").model_dump_json(),
27+
ArgProperty(title="模型", options=["Nano Banana", "DALLE3"],
28+
default_value="Nano Banana", description="无", type="dropdown").model_dump_json(),
2729
"resolution":
28-
ArgProperty(title="分辨率", options=["256x256(限DALLE2)", "512x512(限DALLE2)", "1024x1024", "1792x1024(限DALLE3)", "1024x1792(限DALLE3)"], default_value="1024x1024", description="无", type="dropdown").model_dump_json(),
29-
"quality (仅DALLE3生效)":
30-
ArgProperty(title="质量", options=["standard", "hd"], default_value="standard", description="无", type="dropdown").model_dump_json(),
31-
"style (仅DALLE3生效)":
32-
ArgProperty(title="风格", options=["vivid", "natural"], default_value="vivid", description="无", type="dropdown").model_dump_json(),
33-
30+
ArgProperty(title="分辨率", options=["1K", "2K"],
31+
default_value="1K", description="无", type="dropdown").model_dump_json(),
32+
"aspect ratio":
33+
ArgProperty(title="横纵比例", options=["1:1", "16:9", "3:4"],
34+
default_value="16:9", description="无", type="dropdown").model_dump_json(),
35+
"quality":
36+
ArgProperty(title="质量 (仅DALLE3生效)", options=["standard", "hd"],
37+
default_value="standard", description="无", type="dropdown").model_dump_json(),
38+
"style":
39+
ArgProperty(title="风格 (仅DALLE3生效)", options=["vivid", "natural"],
40+
default_value="vivid", description="无", type="dropdown").model_dump_json(),
3441
}
3542
return gui_definition
3643

44+
3745
def execute(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request):
3846
"""
3947
执行插件
4048
"""
41-
# 分辨率
49+
4250
resolution = plugin_kwargs["resolution"].replace("(限DALLE2)", "").replace("(限DALLE3)", "")
4351

44-
if plugin_kwargs["model_name"] == "DALLE2":
45-
plugin_kwargs["advanced_arg"] = resolution
52+
if plugin_kwargs["model_name"] == "Nano Banana":
53+
yield from 图片生成_NanoBanana(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
54+
55+
elif plugin_kwargs["model_name"] == "DALLE2":
56+
plugin_kwargs["advanced_arg"] = "1024x1024"
4657
yield from 图片生成_DALLE2(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
4758

4859
elif plugin_kwargs["model_name"] == "DALLE3":
49-
quality = plugin_kwargs["quality (仅DALLE3生效)"]
50-
style = plugin_kwargs["style (仅DALLE3生效)"]
60+
resolution = "1792x1024" if resolution == "2K" else "1024x1024"
61+
quality = plugin_kwargs["quality"]
62+
style = plugin_kwargs["style"]
5163
plugin_kwargs["advanced_arg"] = f"{resolution}-{quality}-{style}"
5264
yield from 图片生成_DALLE3(txt, llm_kwargs, plugin_kwargs, chatbot, history, system_prompt, user_request)
5365

0 commit comments

Comments
 (0)