Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 37 additions & 17 deletions genie-backend/src/main/java/com/jd/genie/agent/agent/BaseAgent.java
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import lombok.Data;
import lombok.experimental.Accessors;
import lombok.extern.slf4j.Slf4j;
import org.apache.commons.lang3.StringUtils;

import java.util.ArrayList;
import java.util.List;
Expand Down Expand Up @@ -92,26 +93,45 @@ public String run(String query) {
* 更新代理记忆
*/
public void updateMemory(RoleType role, String content, String base64Image, Object... args) {
Message message;
switch (role) {
case USER:
message = Message.userMessage(content, base64Image);
break;
case SYSTEM:
message = Message.systemMessage(content, base64Image);
break;
case ASSISTANT:
message = Message.assistantMessage(content, base64Image);
break;
case TOOL:
message = Message.toolMessage(content, (String) args[0], base64Image);
break;
default:
throw new IllegalArgumentException("Unsupported role type: " + role);
}
updateMemoryWithImage(role, content, base64Image, null, args);
}

/**
* 更新代理记忆(支持 base64 和 URL 图片)
*/
public void updateMemoryWithImage(RoleType role, String content, String base64Image, String imageUrl, Object... args) {
Message message = switch (role) {
case USER -> createImageMessage(RoleType.USER, content, base64Image, imageUrl);
case SYSTEM -> createImageMessage(RoleType.SYSTEM, content, base64Image, imageUrl);
case ASSISTANT -> createImageMessage(RoleType.ASSISTANT, content, base64Image, imageUrl);
case TOOL -> Message.toolMessage(content, (String) args[0], base64Image);
};
memory.addMessage(message);
}

/**
* 创建带图片的消息
*/
private Message createImageMessage(RoleType role, String content, String base64Image, String imageUrl) {
// 优先使用 imageUrl
if (StringUtils.isNotEmpty(imageUrl)) {
return switch (role) {
case USER -> Message.userMessageWithImageUrl(content, imageUrl);
case ASSISTANT -> Message.assistantMessageWithImageUrl(content, imageUrl);
// NOTE: SYSTEM 角色通常用于规范 agent 行为,包含图片并不是一个良好的实践,故不支持 imageUrl
default -> throw new IllegalArgumentException("Unsupported role type: " + role);
};
}

// 使用 base64Image
return switch (role) {
case USER -> Message.userMessage(content, base64Image);
case SYSTEM -> Message.systemMessage(content, base64Image);
case ASSISTANT -> Message.assistantMessage(content, base64Image);
default -> throw new IllegalArgumentException("Unsupported role type: " + role);
};
}

public String executeTool(ToolCall command) {
if (command == null || command.getFunction() == null || command.getFunction().getName() == null) {
return "Error: Invalid function call format";
Expand Down
38 changes: 38 additions & 0 deletions genie-backend/src/main/java/com/jd/genie/agent/dto/Message.java
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ public class Message {
private RoleType role; // 消息角色
private String content; // 消息内容
private String base64Image; // 图片数据(base64编码)
private String imageUrl; // 图片 URL - 支持远程图片
private String toolCallId; // 工具调用ID
private List<ToolCall> toolCalls; // 工具调用列表

Expand Down Expand Up @@ -79,4 +80,41 @@ public static Message fromToolCalls(String content, List<ToolCall> toolCalls) {
.toolCalls(toolCalls)
.build();
}

/**
* 创建带图片 URL 的用户消息
*/
public static Message userMessageWithImageUrl(String content, String imageUrl) {
return Message.builder()
.role(RoleType.USER)
.content(content)
.imageUrl(imageUrl)
.build();
}

/**
* 创建带图片 URL 的助手消息
*/
public static Message assistantMessageWithImageUrl(String content, String imageUrl) {
return Message.builder()
.role(RoleType.ASSISTANT)
.content(content)
.imageUrl(imageUrl)
.build();
}

/**
* 判断消息是否包含图片(base64 或 URL)
*/
public boolean hasImage() {
return (base64Image != null && !base64Image.isEmpty())
|| (imageUrl != null && !imageUrl.isEmpty());
}

/**
* 判断是否使用 URL 方式的图片
*/
public boolean isImageUrl() {
return imageUrl != null && !imageUrl.isEmpty();
}
}
39 changes: 24 additions & 15 deletions genie-backend/src/main/java/com/jd/genie/agent/llm/LLM.java
Original file line number Diff line number Diff line change
Expand Up @@ -87,23 +87,32 @@ public static List<Map<String, Object>> formatMessages(List<Message> messages, b

for (Message message : messages) {
Map<String, Object> messageMap = new HashMap<>();
// 处理 base64 图像
if (message.getBase64Image() != null && !message.getBase64Image().isEmpty()) {
// 处理图片(支持 imageUrl 和 base64Image)
if (message.hasImage()) {
List<Map<String, Object>> multimodalContent = new ArrayList<>();
// 创建内层的 image_url Map

// 1. 添加文本内容
if (message.getContent() != null && !message.getContent().isEmpty()) {
Map<String, Object> textMap = new HashMap<>();
textMap.put("type", "text");
textMap.put("text", message.getContent());
multimodalContent.add(textMap);
}

// 2. 添加图片内容
Map<String, Object> imageMap = new HashMap<>();
imageMap.put("type", "image_url");

Map<String, String> imageUrlMap = new HashMap<>();
imageUrlMap.put("url", "data:image/jpeg;base64," + message.getBase64Image());
// 创建外层的 Map
Map<String, Object> outerMap = new HashMap<>();
outerMap.put("type", "image_url");
outerMap.put("image_url", imageUrlMap);
// 将创建好的 Map 添加到 multimodalContent 中
multimodalContent.add(outerMap);

Map<String, Object> contentMap = new HashMap<>();
outerMap.put("type", "text");
outerMap.put("text", message.getContent());
multimodalContent.add(contentMap);
if (message.isImageUrl()) {
// 使用图片 URL
imageUrlMap.put("url", message.getImageUrl());
} else {
// 使用 base64 编码图片
imageUrlMap.put("url", "data:image/jpeg;base64," + message.getBase64Image());
}
imageMap.put("image_url", imageUrlMap);
multimodalContent.add(imageMap);

messageMap.put("role", message.getRole().getValue());
messageMap.put("content", multimodalContent);
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package com.jd.genie.agent.dto;

import com.jd.genie.agent.enums.RoleType;
import org.junit.jupiter.api.Test;

import static org.junit.jupiter.api.Assertions.*;

/**
* Message 图片 URL 支持测试
*/
class MessageImageUrlTest {

@Test
void testUserMessageWithImageUrl() {
// 测试创建带 URL 图片的用户消息
String content = "请分析这张图片";
String imageUrl = "https://example.com/result.jpg";
Message msg = Message.userMessageWithImageUrl(content, imageUrl);

assertNotNull(msg, "Message should not be null");
assertEquals(RoleType.USER, msg.getRole(), "Role should be USER");
assertEquals(content, msg.getContent(), "Content should match");
assertEquals(imageUrl, msg.getImageUrl(), "ImageUrl should match");
assertNull(msg.getBase64Image(), "Base64Image should be null");
}

@Test
void testAssistantMessageWithImageUrl() {
// 测试创建带 URL 图片的助手消息
String content = "这是分析结果";
String imageUrl = "https://example.com/result.jpg";

Message msg = Message.assistantMessageWithImageUrl(content, imageUrl);

assertEquals(RoleType.ASSISTANT, msg.getRole());
assertEquals(imageUrl, msg.getImageUrl());
}

@Test
void testHasImage_withImageUrl() {
// 测试 hasImage() 方法 - URL 方式
Message msg = Message.userMessageWithImageUrl("test", "https://example.com/img.jpg");
assertTrue(msg.hasImage(), "Should have image");
}

@Test
void testHasImage_withBase64() {
// 测试 hasImage() 方法 - Base64 方式
Message msg = Message.userMessage("test", "base64String");

assertTrue(msg.hasImage(), "Should have image");
}

@Test
void testHasImage_withoutImage() {
// 测试 hasImage() 方法 - 无图片
Message msg = Message.userMessage("test", null);

assertFalse(msg.hasImage(), "Should not have image");
}

@Test
void testIsImageUrl_true() {
// 测试 isImageUrl() 方法 - 返回 true
Message msg = Message.userMessageWithImageUrl("test", "https://example.com/img.jpg");

assertTrue(msg.isImageUrl(), "Should be image URL");
}

@Test
void testIsImageUrl_false() {
// 测试 isImageUrl() 方法 - 返回 false
Message msg = Message.userMessage("test", "base64String");

assertFalse(msg.isImageUrl(), "Should not be image URL");
}

@Test
void testBackwardCompatibility_base64() {
// 测试向后兼容性 - base64 方式仍然可用
String content = "旧代码";
String base64 = "iVBORw0KGgoAAAANS...";

Message msg = Message.userMessage(content, base64);

assertEquals(RoleType.USER, msg.getRole());
assertEquals(content, msg.getContent());
assertEquals(base64, msg.getBase64Image());
assertNull(msg.getImageUrl(), "ImageUrl should be null for old API");
assertTrue(msg.hasImage(), "Should have image");
assertFalse(msg.isImageUrl(), "Should not be URL type");
}

@Test
void testBothImageTypes() {
// 测试同时设置两种图片(通过 builder)
String content = "测试";
String base64 = "base64String";
String imageUrl = "https://example.com/img.jpg";

Message msg = Message.builder()
.role(RoleType.USER)
.content(content)
.base64Image(base64)
.imageUrl(imageUrl)
.build();

assertTrue(msg.hasImage(), "Should have image");
assertTrue(msg.isImageUrl(), "Should be URL type (URL takes precedence)");
assertEquals(imageUrl, msg.getImageUrl());
assertEquals(base64, msg.getBase64Image());
}

@Test
void testEmptyImageUrl() {
// 测试空字符串 URL
Message msg = Message.builder()
.role(RoleType.USER)
.content("test")
.imageUrl("")
.build();

assertFalse(msg.hasImage(), "Empty URL should not count as image");
assertFalse(msg.isImageUrl(), "Empty URL should return false");
}

@Test
void testNullImageUrl() {
// 测试 null URL
Message msg = Message.builder()
.role(RoleType.USER)
.content("test")
.imageUrl(null)
.build();

assertFalse(msg.hasImage(), "Null URL should not count as image");
assertFalse(msg.isImageUrl(), "Null URL should return false");
}
}
Loading