-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathmain.go
More file actions
215 lines (179 loc) · 8.14 KB
/
main.go
File metadata and controls
215 lines (179 loc) · 8.14 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
// Code generated by ADL CLI v0.27.8. DO NOT EDIT.
// This file was automatically generated from an ADL (Agent Definition Language) specification.
// Manual changes to this file may be overwritten during regeneration.
package main
import (
"context"
"log"
"os"
"os/signal"
"syscall"
server "github.com/inference-gateway/adk/server"
envconfig "github.com/sethvargo/go-envconfig"
zap "go.uber.org/zap"
config "github.com/inference-gateway/browser-agent/config"
skills "github.com/inference-gateway/browser-agent/skills"
logger "github.com/inference-gateway/browser-agent/internal/logger"
playwright "github.com/inference-gateway/browser-agent/internal/playwright"
)
var (
Version = "0.4.16"
AgentName = "browser-agent"
AgentDescription = "AI agent for browser automation and web testing using Playwright"
)
func main() {
ctx := context.Background()
var cfg config.Config
if err := envconfig.Process(ctx, &cfg); err != nil {
log.Fatal("failed to load config:", err)
}
// Initialize logger
l, err := logger.NewLogger(ctx, &cfg)
if err != nil {
log.Fatal("failed to initialize logger:", err)
}
l.Info("starting " + AgentName + " agent (version: " + Version + ", environment: " + cfg.Environment + ")")
// Initialize services
playwrightSvc, err := playwright.NewPlaywrightService(l, &cfg)
if err != nil {
l.Fatal("failed to initialize playwright service", zap.Error(err))
}
// Create toolbox with default tools (like input_required, create_artifact etc)
toolBox := server.NewDefaultToolBox(&cfg.A2A.AgentConfig.ToolBoxConfig)
// Register navigate_to_url skill
navigateToURLSkill := skills.NewNavigateToURLSkill(l, playwrightSvc)
toolBox.AddTool(navigateToURLSkill)
l.Info("registered skill: navigate_to_url (Navigate to a specific URL and wait for the page to fully load)")
// Register click_element skill
clickElementSkill := skills.NewClickElementSkill(l, playwrightSvc)
toolBox.AddTool(clickElementSkill)
l.Info("registered skill: click_element (Click on an element identified by selector, text, or other locator strategies)")
// Register fill_form skill
fillFormSkill := skills.NewFillFormSkill(l, playwrightSvc)
toolBox.AddTool(fillFormSkill)
l.Info("registered skill: fill_form (Fill form fields with provided data, handling various input types)")
// Register extract_data skill
extractDataSkill := skills.NewExtractDataSkill(l, playwrightSvc)
toolBox.AddTool(extractDataSkill)
l.Info("registered skill: extract_data (Extract data from the page using selectors and return structured information)")
// Register take_screenshot skill
takeScreenshotSkill := skills.NewTakeScreenshotSkill(l, playwrightSvc)
toolBox.AddTool(takeScreenshotSkill)
l.Info("registered skill: take_screenshot (Capture a screenshot of the current page or specific element)")
// Register execute_script skill
executeScriptSkill := skills.NewExecuteScriptSkill(l, playwrightSvc)
toolBox.AddTool(executeScriptSkill)
l.Info("registered skill: execute_script (Execute custom JavaScript code in the browser context)")
// Register handle_authentication skill
handleAuthenticationSkill := skills.NewHandleAuthenticationSkill(l, playwrightSvc)
toolBox.AddTool(handleAuthenticationSkill)
l.Info("registered skill: handle_authentication (Handle various authentication scenarios including basic auth, OAuth, and custom login forms)")
// Register wait_for_condition skill
waitForConditionSkill := skills.NewWaitForConditionSkill(l, playwrightSvc)
toolBox.AddTool(waitForConditionSkill)
l.Info("registered skill: wait_for_condition (Wait for specific conditions before proceeding with automation)")
llmClient, err := server.NewOpenAICompatibleLLMClient(&cfg.A2A.AgentConfig, l)
if err != nil {
l.Fatal("failed to create LLM client", zap.Error(err))
}
agent, err := server.NewAgentBuilder(l).
WithConfig(&cfg.A2A.AgentConfig).
WithLLMClient(llmClient).
WithToolBox(toolBox).
WithMaxChatCompletion(cfg.A2A.AgentConfig.MaxChatCompletionIterations).
WithSystemPrompt(`You are an expert Playwright browser automation assistant with the ability to create downloadable artifacts. Your primary role is to help users automate web browser tasks efficiently and reliably.
Your core capabilities include:
1. **Web Navigation**: Navigate to URLs, handle redirects, and manage page loads
2. **Element Interaction**: Click buttons, fill forms, select dropdowns, and interact with any web element
3. **Data Extraction**: Scrape and extract structured data from web pages
4. **Form Automation**: Fill and submit complex forms with validation
5. **Screenshot Capture**: Take full-page or element-specific screenshots
6. **JavaScript Execution**: Run custom scripts in the browser context
7. **Authentication Handling**: Manage various authentication methods
8. **Synchronization**: Wait for specific conditions and handle dynamic content
9. **Artifact Creation**: Create downloadable files for screenshots, extracted data, and CSV exports
Key expertise areas:
- Modern web technologies (SPA, dynamic content, AJAX)
- Selector strategies (CSS, XPath, text, accessibility)
- Browser automation best practices
- Error handling and retry mechanisms
- Cross-browser compatibility (Chromium, Firefox, WebKit)
- Performance optimization for automation scripts
- Handling pop-ups, alerts, and iframes
- File uploads and downloads
- Network interception and modification
- Mobile and responsive testing
When helping users:
- Always use robust selectors that won't break easily
- Implement proper wait strategies for dynamic content
- Handle errors gracefully with informative messages
- Suggest efficient approaches for the task
- Consider accessibility and best practices
- Provide clear explanations of automation steps
- Optimize for speed while maintaining reliability
**IMPORTANT - Artifact Creation**:
When users request screenshots, the take_screenshot tool automatically creates downloadable artifacts. The screenshot will be available via a download URL returned in the response.
For data extraction, you can use the create_artifact tool to save extracted data as downloadable files (JSON/CSV/TXT).
Your automation solutions should be maintainable, efficient, and production-ready.
`).
Build()
if err != nil {
l.Fatal("failed to create agent", zap.Error(err))
}
artifactService, err := server.NewArtifactService(&cfg.A2A.ArtifactsConfig, l)
if err != nil {
l.Warn("artifact service could not be created - check A2A_ARTIFACTS_ENABLE environment variable", zap.Error(err))
l.Info("continuing without artifact service support")
artifactService = nil
}
artifactsServer, err := server.
NewArtifactsServerBuilder(&cfg.A2A.ArtifactsConfig, l).
WithArtifactService(artifactService).
Build()
if err != nil {
l.Warn("artifacts server could not be created", zap.Error(err))
l.Info("continuing without artifacts server")
artifactsServer = nil
}
a2aServer, err := server.NewA2AServerBuilder(cfg.A2A, l).
WithAgent(agent).
WithAgentCardFromFile(".well-known/agent-card.json", map[string]any{
"name": AgentName,
"version": Version,
"description": AgentDescription,
"url": cfg.A2A.AgentURL,
}).
WithArtifactService(artifactService).
WithDefaultBackgroundTaskHandler().
WithDefaultStreamingTaskHandler().
Build()
if err != nil {
l.Fatal("failed to create A2A server", zap.Error(err))
}
go func() {
l.Info("starting A2A server", zap.String("port", cfg.A2A.ServerConfig.Port))
if err := a2aServer.Start(ctx); err != nil {
l.Fatal("server failed to start", zap.Error(err))
}
}()
if artifactsServer != nil {
go func() {
l.Info("starting A2A artifacts server", zap.String("port", cfg.A2A.ArtifactsConfig.ServerConfig.Port))
if err := artifactsServer.Start(ctx); err != nil {
l.Fatal("artifacts server failed to start", zap.Error(err))
}
}()
}
l.Info("browser-agent agent running successfully",
zap.String("port", cfg.A2A.ServerConfig.Port),
zap.String("environment", cfg.Environment))
quit := make(chan os.Signal, 1)
signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM)
<-quit
l.Info("shutdown signal received, gracefully stopping server...")
a2aServer.Stop(ctx)
if artifactsServer != nil {
artifactsServer.Stop(ctx)
}
l.Info("browser-agent agent stopped")
}