Skip to content

Commit 0b7fbd3

Browse files
committed
docs: add godoc for public api and readme response examples
1 parent a9986fd commit 0b7fbd3

16 files changed

Lines changed: 339 additions & 95 deletions

File tree

README.md

Lines changed: 82 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ API response example:
8282
"rank": 2,
8383
"url": "https://www.bing.com/ck/a?!&&p=6f15ac4589858d0a104cd6f55cc8",
8484
"title": "Golden Retriever Dog Forums",
85-
"description": "Oct 20, 2024 · Back in the 1970s, Golden Retrievers routinely lived until 16 and 17 years old, they are now...",
85+
"description": "Oct 20, 2024 · Back in the 1970s, Golden Retrievers routinely lived until 16 and 17 years old, they are now...",
8686
"ad": false,
8787
"engine": "bing"
8888
},
@@ -142,6 +142,87 @@ curl "http://127.0.0.1:7000/yandex/search?text=golang&limit=10&start=10"
142142
curl "http://127.0.0.1:7000/bing/image?text=golang&limit=20"
143143
```
144144

145+
## Response Examples
146+
147+
Interactive docs (OpenAPI + Swagger UI) are available at:
148+
149+
- `http://127.0.0.1:7000/docs`
150+
- `http://127.0.0.1:7000/openapi.yaml`
151+
152+
### Web Search Response (`/<engine>/search`)
153+
154+
```json
155+
[
156+
{
157+
"rank": 1,
158+
"url": "https://go.dev/doc/",
159+
"title": "Documentation - The Go Programming Language",
160+
"description": "Official Go documentation, tutorials, references, and release notes.",
161+
"ad": false
162+
},
163+
{
164+
"rank": 2,
165+
"url": "https://pkg.go.dev/",
166+
"title": "pkg.go.dev",
167+
"description": "Go package discovery and API documentation.",
168+
"ad": false
169+
}
170+
]
171+
```
172+
173+
### Image Search Response (`/<engine>/image`)
174+
175+
```json
176+
[
177+
{
178+
"rank": 1,
179+
"url": "https://golang.org/lib/godoc/images/go-logo-blue.svg",
180+
"title": "Go Gopher Logo",
181+
"description": "Source: https://go.dev/brand/",
182+
"ad": false
183+
},
184+
{
185+
"rank": 2,
186+
"url": "https://example.com/images/go-mascot.png",
187+
"title": "Go mascot",
188+
"description": "Height:800, Width:1200, Source Page: https://example.com/post",
189+
"ad": false
190+
}
191+
]
192+
```
193+
194+
### Error Responses
195+
196+
`400 Bad Request` (invalid/missing query):
197+
198+
```json
199+
{
200+
"error": "bad_request",
201+
"code": 400,
202+
"message": "Query cannot be empty"
203+
}
204+
```
205+
206+
`503 Service Unavailable` (engine unavailable, captcha, timeout, or proxy path failure):
207+
208+
```json
209+
{
210+
"error": "service_unavailable",
211+
"code": 503,
212+
"message": "captcha found, please stop sending requests for a while: captcha detected"
213+
}
214+
```
215+
216+
### Response Headers
217+
218+
| Header | Values/Examples | Meaning |
219+
| ------------------- | ------------------------------- | ------------------------------------------------------------- |
220+
| `X-Cache` | `HIT`, `MISS`, `BYPASS` | Cache result for this response. |
221+
| `X-Fallback-Engine` | `google`, `bing`, `duckduckgo` | Present when dedicated endpoint used fallback engine. |
222+
| `X-Proxy-Mode` | `off`, `single`, `pool` | Proxy policy mode applied by resilient search. |
223+
| `X-Proxy-Tag` | `residential`, `datacenter`, `` | Selected proxy pool tag. Empty when proxy mode is off/direct. |
224+
| `X-Proxy-Used` | `direct`, `socks5://host:port` | Actual upstream route used to execute request. |
225+
145226
## 🌍 Proxy Support
146227

147228
OpenSERP supports HTTP and SOCKS5 proxies.

baidu/search.go

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,12 +31,14 @@ type imageDataJson struct {
3131
}
3232
}
3333

34+
// Baidu implements core.SearchEngine for Baidu SERP pages.
3435
type Baidu struct {
3536
core.Browser
3637
core.SearchEngineOptions
3738
logger *core.EngineLogger
3839
}
3940

41+
// New creates a Baidu engine instance with browser/runtime options applied.
4042
func New(browser core.Browser, opts core.SearchEngineOptions) *Baidu {
4143
baid := Baidu{Browser: browser}
4244
opts.Init()
@@ -45,10 +47,12 @@ func New(browser core.Browser, opts core.SearchEngineOptions) *Baidu {
4547
return &baid
4648
}
4749

50+
// Name returns the stable engine identifier.
4851
func (baid *Baidu) Name() string {
4952
return "baidu"
5053
}
5154

55+
// GetRateLimiter returns a limiter configured from SearchEngineOptions.
5256
func (baid *Baidu) GetRateLimiter() *rate.Limiter {
5357
ratelimit := rate.Every(baid.GetRatelimit())
5458
return rate.NewLimiter(ratelimit, baid.RateBurst)
@@ -64,6 +68,8 @@ func (baid *Baidu) isTimeout(page *rod.Page) bool {
6468
return err == nil
6569
}
6670

71+
// Search executes a Baidu web search and returns normalized search results.
72+
// It may return core.ErrCaptcha or core.ErrSearchTimeout.
6773
func (baid *Baidu) Search(query core.Query) ([]core.SearchResult, error) {
6874
baid.logger.Debug("Starting search, query: %+v", query)
6975

@@ -145,6 +151,8 @@ func (baid *Baidu) Search(query core.Query) ([]core.SearchResult, error) {
145151
return core.DeduplicateResults(searchResults), nil
146152
}
147153

154+
// SearchImage executes a Baidu image search and returns normalized image
155+
// results. It may return core.ErrCaptcha or core.ErrSearchTimeout.
148156
func (baid *Baidu) SearchImage(query core.Query) ([]core.SearchResult, error) {
149157
baid.logger.Debug("Starting image search, query: %+v", query)
150158

baidu/url.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,6 +22,8 @@ func dateToTimestamp(date string) (int64, error) {
2222
return t.Unix(), nil
2323
}
2424

25+
// BuildURL builds a Baidu web search URL from Query fields.
26+
// It returns an error when query text, date, or pagination parameters are invalid.
2527
func BuildURL(q core.Query) (string, error) {
2628
base, _ := url.Parse("https://www.baidu.com/")
2729
base.Path += "s"
@@ -84,6 +86,8 @@ func BuildURL(q core.Query) (string, error) {
8486
return base.String(), nil
8587
}
8688

89+
// BuildImageURL builds a Baidu image search URL from Query fields and page
90+
// index. It returns an error when the query text is empty.
8791
func BuildImageURL(q core.Query, pageNum int) (string, error) {
8892
base, _ := url.Parse("https://image.baidu.com/")
8993
base.Path += "search/acjson"

bing/search.go

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,14 @@ import (
1313
"golang.org/x/time/rate"
1414
)
1515

16+
// Bing implements core.SearchEngine for Bing SERP pages.
1617
type Bing struct {
1718
core.Browser
1819
core.SearchEngineOptions
1920
logger *core.EngineLogger
2021
}
2122

23+
// New creates a Bing engine instance with browser/runtime options applied.
2224
func New(browser core.Browser, opts core.SearchEngineOptions) *Bing {
2325
bing := Bing{Browser: browser}
2426
opts.Init()
@@ -27,10 +29,12 @@ func New(browser core.Browser, opts core.SearchEngineOptions) *Bing {
2729
return &bing
2830
}
2931

32+
// Name returns the stable engine identifier.
3033
func (bing *Bing) Name() string {
3134
return "bing"
3235
}
3336

37+
// GetRateLimiter returns a limiter configured from SearchEngineOptions.
3438
func (bing *Bing) GetRateLimiter() *rate.Limiter {
3539
ratelimit := rate.Every(bing.GetRatelimit())
3640
return rate.NewLimiter(ratelimit, bing.RateBurst)
@@ -99,6 +103,8 @@ func (bing *Bing) close(page *rod.Page) {
99103
}
100104
}
101105

106+
// Search executes a Bing web search and returns normalized search results.
107+
// It may return core.ErrCaptcha or core.ErrSearchTimeout.
102108
func (bing *Bing) Search(query core.Query) ([]core.SearchResult, error) {
103109
bing.logger.Debug("Starting search, query: %+v", query)
104110

@@ -240,7 +246,7 @@ func (bing *Bing) Search(query core.Query) ([]core.SearchResult, error) {
240246
return deduped, nil
241247
}
242248

243-
// BingImageData represents the JSON structure in the m attribute of image elements
249+
// BingImageData represents metadata encoded in the image result `m` attribute.
244250
type BingImageData struct {
245251
T string `json:"t"` // Title
246252
Desc string `json:"desc"` // Description
@@ -252,7 +258,8 @@ type BingImageData struct {
252258
MURL string `json:"murl"` // Image URL
253259
}
254260

255-
// SearchImage performs Bing image search and returns results
261+
// SearchImage executes a Bing image search and returns normalized image
262+
// results. It may return core.ErrCaptcha or core.ErrSearchTimeout.
256263
func (bing *Bing) SearchImage(query core.Query) ([]core.SearchResult, error) {
257264
bing.logger.Debug("Starting image search, query: %+v", query)
258265

bing/url.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@ import (
1212
"github.com/sirupsen/logrus"
1313
)
1414

15+
// BuildURL builds a Bing web search URL from Query fields.
16+
// It returns an error when query text or date parameters are invalid.
1517
func BuildURL(q core.Query) (string, error) {
1618
base, err := url.Parse("https://www.bing.com")
1719
if err != nil {
@@ -93,6 +95,8 @@ func BuildURL(q core.Query) (string, error) {
9395
return base.String(), nil
9496
}
9597

98+
// BuildImageURL builds a Bing image search URL from Query fields.
99+
// It returns an error when the resulting query text is empty.
96100
func BuildImageURL(q core.Query) (string, error) {
97101
base, err := url.Parse("https://www.bing.com")
98102
if err != nil {

core/browser.go

Lines changed: 34 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -17,23 +17,35 @@ import (
1717
"github.com/sirupsen/logrus"
1818
)
1919

20+
// BrowserOpts configures Chromium launch and navigation behavior.
2021
type BrowserOpts struct {
21-
IsHeadless bool // Use browser interface
22-
IsLeakless bool // Force to kill browser
23-
Timeout time.Duration // Timeout
24-
LanguageCode string
25-
WaitRequests bool // Wait requests to complete after navigation
26-
LeavePageOpen bool // Leave pages and browser open
27-
WaitLoadTime time.Duration // Time to wait till page loads
28-
CaptchaSolverApiKey string // 2Captcha api key
29-
BrowserPath string // Explicit browser executable path
30-
ProxyURL string // Proxy URL
31-
Insecure bool // Allow insecure TLS connections
32-
UseStealth bool // Use go-rod stealth plugin
33-
22+
// IsHeadless runs Chromium without visible UI.
23+
IsHeadless bool
24+
// IsLeakless forces child browser process cleanup when the parent exits.
25+
IsLeakless bool
26+
// Timeout is applied to browser connect and page navigation operations.
27+
Timeout time.Duration
28+
// LanguageCode sets Accept-Language for emulated requests.
29+
LanguageCode string
30+
// WaitRequests waits for request-idle state after navigation.
31+
WaitRequests bool
32+
// LeavePageOpen keeps pages open after search operations.
33+
LeavePageOpen bool
34+
// WaitLoadTime is an additional fixed wait after load/idle checks.
35+
WaitLoadTime time.Duration
36+
// CaptchaSolverApiKey enables 2Captcha integration for supported engines.
37+
CaptchaSolverApiKey string
38+
// BrowserPath optionally points to a specific browser executable.
39+
BrowserPath string
40+
// ProxyURL defines the upstream proxy for browser traffic.
41+
ProxyURL string
42+
// Insecure allows invalid TLS certificates for browser requests.
43+
Insecure bool
44+
// UseStealth enables go-rod stealth page creation.
45+
UseStealth bool
3446
}
3547

36-
// Initialize browser parameters with default values if they are not set
48+
// Check applies default option values when optional fields are unset.
3749
func (o *BrowserOpts) Check() {
3850
if o.Timeout == 0 {
3951
o.Timeout = time.Second * 30
@@ -44,13 +56,16 @@ func (o *BrowserOpts) Check() {
4456
}
4557
}
4658

59+
// Browser wraps a launched Chromium instance used by engine implementations.
4760
type Browser struct {
4861
BrowserOpts
4962
browserAddr string
5063
browser *rod.Browser
5164
CaptchaSolver *CaptchaSolver
5265
}
5366

67+
// NewBrowser launches a new Chromium process via Rod launcher and returns a
68+
// Browser wrapper configured with proxy and captcha solver settings.
5469
func NewBrowser(opts BrowserOpts) (*Browser, error) {
5570
opts.Check()
5671
logrus.Debugf("Browser options: %+v", opts)
@@ -151,7 +166,7 @@ func resolveBrowserBinaryPath(browserPath string, lookPath func() (string, bool)
151166
return "", nil
152167
}
153168

154-
// Check whether browser instance is already created
169+
// IsInitialized reports whether the browser launcher has been created.
155170
func (b *Browser) IsInitialized() bool {
156171
if b.browserAddr != "" {
157172
return true
@@ -160,7 +175,9 @@ func (b *Browser) IsInitialized() bool {
160175
}
161176
}
162177

163-
// Open URL
178+
// Navigate connects to Chromium, creates a page, applies stealth/emulation and
179+
// proxy auth, then navigates to URL. It returns an initialized page ready for
180+
// selector queries, or an error when browser setup/navigation fails.
164181
func (b *Browser) Navigate(URL string) (*rod.Page, error) {
165182
logrus.Debug("Navigate to: ", URL)
166183

@@ -280,6 +297,7 @@ func (b *Browser) Navigate(URL string) (*rod.Page, error) {
280297
return page, nil
281298
}
282299

300+
// Close closes the active browser connection.
283301
func (b *Browser) Close() error {
284302
return b.browser.Close()
285303
}

0 commit comments

Comments
 (0)