Easily Write Crawlers with Req

Automatically Change IP When Request Fails (Set Proxy)

var crawlerClient = NewAutoChangeProxyClient()
var proxies = []string{
  "http://proxy.example.com:8080",
  "https://proxy.example.com:9443",
  "socks5://proxy.example.com:1080",
}

func NewAutoChangeProxyClient() *req.Client {
  client := req.C()

  client.SetTimeout(5 * time.Second).
    EnableDumpEachRequest().
    SetCommonRetryCount(len(proxies)).
    SetCommonRetryCondition(func(resp *req.Response, err error) bool {
      return err != nil || resp.StatusCode == http.StatusTooManyRequests
    }).
    SetCommonRetryHook(func(resp *req.Response, err error) {
      c := client.Clone().SetProxyURL(proxies[resp.Request.RetryAttempt-1]) // Create a client with proxy
      resp.Request.SetClient(c)                                             // Change the client of request dynamically.
    })
  return client
}

Parse HTML with goquery to Extract Content

// Send request.
resp, err := crawlerClient.R().Get(url)
if err != nil {
  return err
}

// Pass resp.Body to goquery.
doc, err := goquery.NewDocumentFromReader(resp.Body)
if err != nil { // Append raw dump content to error message if goquery parse failed to help troubleshoot.
  return fmt.Errorf("failed to parse html: %s, raw content:\n%s", err.Error(), resp.Dump())
}

// Parse html content.
// ...

The complete example can be found here: Integrate with goquery