Skip to content

Instantly share code, notes, and snippets.

@leonjza
Last active September 13, 2024 05:53
Show Gist options
  • Save leonjza/34a76df8a695e21826398a1dbb524798 to your computer and use it in GitHub Desktop.
Save leonjza/34a76df8a695e21826398a1dbb524798 to your computer and use it in GitHub Desktop.
go-rod vs chromedp

chromedp vs go-rod

An experiment comparing chromedb and go-rod when taking screenshots.

https://github.com/chromedp/chromedp vs https://github.com/go-rod/rod, using https://tranco-list.eu/

For the results below, false means there was no error taking the screenshot.

chromedp

go run main.go ../top1mlessporn100.txt  148.24s user 30.07s system 139% cpu 2:07.53 total
❯ cat results.jsonl | jq -r .failed | sort -nr | uniq -c
 143 true
  57 false

go-rod

go run main.go ../top1mlessporn100.txt  1.08s user 1.13s system 2% cpu 1:14.32 total
❯ cat results.jsonl | jq -r .failed | sort -nr | uniq -c
  62 true
  74 false
package main
import (
"bufio"
"context"
"encoding/json"
"fmt"
"os"
"strings"
"sync"
"time"
_ "net/http/pprof"
"github.com/chromedp/cdproto/page"
"github.com/chromedp/chromedp"
)
var (
workers = 20
timeout = 60
)
type Result struct {
URL string `json:"url"`
Title string `json:"title"`
Failed bool `json:"failed"`
Error string `json:"error"`
}
func (r *Result) Write() {
j, err := json.Marshal(r)
if err != nil {
panic(err)
}
file, err := os.OpenFile("results.jsonl",
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
panic(err)
}
if _, err := file.Write(append(j, '\n')); err != nil {
panic(err)
}
}
func worker(wg *sync.WaitGroup, ctx context.Context, tasks chan string) {
defer wg.Done()
for target := range tasks {
var result = &Result{
URL: target,
Failed: false,
}
tabCtx, cancel := chromedp.NewContext(ctx) // Open a new tab in the same browser
defer cancel()
tabCtx, tabCancel := context.WithTimeout(tabCtx, time.Duration(timeout)*time.Second)
defer tabCancel()
fmt.Printf(" info | %s | processing\n", target)
// Navigate and get the title of the page
var title string
err := chromedp.Run(tabCtx,
chromedp.Navigate(target),
chromedp.Title(&title),
)
if err != nil {
fmt.Printf(" erro | %s | failed to navigate or get title: %s\n", target, err.Error())
result.Failed = true
result.Error = err.Error()
result.Write()
cancel()
continue
}
result.Title = title
// Take screenshot
// var buf []byte
err = chromedp.Run(tabCtx,
chromedp.ActionFunc(func(ctx context.Context) error {
var err error
_, err = page.CaptureScreenshot().
WithQuality(80).
WithOptimizeForSpeed(true).
WithFormat(page.CaptureScreenshotFormatJpeg).
Do(ctx)
return err
}),
)
if err != nil {
fmt.Printf(" erro | %s | failed to take screenshot: %s\n", target, err.Error())
result.Failed = true
result.Error = err.Error()
result.Write()
cancel()
continue
}
// Optionally: Save screenshot to disk (if needed)
// err = os.WriteFile(fmt.Sprintf("%s.jpg", target), buf, 0644)
// if err != nil {
// fmt.Printf(" erro | %s | failed to save screenshot: %s\n", target, err.Error())
// }
result.Write()
cancel()
fmt.Printf(" info | %s | done\n", target)
}
}
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
opts := append(chromedp.DefaultExecAllocatorOptions[:],
chromedp.Flag("disable-features", "MediaRouter"),
chromedp.Flag("disable-client-side-phishing-detection", true),
chromedp.Flag("disable-default-apps", true),
chromedp.Flag("hide-scrollbars", true),
chromedp.Flag("mute-audio", true),
chromedp.Flag("no-default-browser-check", true),
chromedp.Flag("no-first-run", true),
chromedp.Flag("deny-permission-prompts", true),
)
// Create a browser instance (ExecAllocator) with the options set
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...)
defer cancel()
// Use a single browser context for all tabs
browserCtx, browserCancel := chromedp.NewContext(allocCtx)
defer browserCancel()
// make sure the browser is up
if err := chromedp.Run(browserCtx); err != nil {
browserCancel()
panic(err)
}
tasks := make(chan string, workers)
var wg sync.WaitGroup
// Start the worker pool
for i := 0; i < workers; i++ {
wg.Add(1)
go worker(&wg, browserCtx, tasks)
}
// Read input URLs and feed them into the task queue
for scanner.Scan() {
candidate := scanner.Text()
if candidate == "" {
continue
}
lines := strings.Split(candidate, ",")
if len(lines) != 2 {
continue
}
target := `https://` + lines[1]
tasks <- target
}
close(tasks)
wg.Wait()
}
package main
import (
"bufio"
"encoding/json"
"fmt"
"os"
"strings"
"sync"
"time"
_ "net/http/pprof"
"github.com/go-rod/rod"
"github.com/go-rod/rod/lib/launcher"
"github.com/go-rod/rod/lib/proto"
"github.com/ysmood/gson"
)
var (
workers = 20
timeout = 60
)
type Result struct {
URL string `json:"url"`
Title string `json:"title"`
Failed bool `json:"failed"`
Error string `json:"error"`
}
func (r *Result) Write() {
j, err := json.Marshal(r)
if err != nil {
panic(err)
}
file, err := os.OpenFile("results.jsonl",
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644)
if err != nil {
panic(err)
}
if _, err := file.Write(append(j, '\n')); err != nil {
panic(err)
}
}
func worker(wg *sync.WaitGroup, browser *rod.Browser, tasks chan string) {
defer wg.Done()
for target := range tasks {
var result = &Result{
URL: target,
Failed: false,
}
page, err := browser.Page(proto.TargetCreateTarget{})
if err != nil {
fmt.Printf(" erro | %s | failed to create page for: %s\n", target, err.Error())
continue
}
page = page.Timeout(time.Duration(timeout) * time.Second) // the whole round trip must take max 1 min.
fmt.Printf(" info | %s | processing\n", target)
if err := page.Navigate(target); err != nil {
fmt.Printf(" erro | %s | failed to navigate to: %s\n", target, err.Error())
// dont write the fail, we couldn't even probe
page.Close()
continue
}
pageInfo, err := page.Info()
if err != nil {
fmt.Printf(" warn | %s | could not get page title: %s\n", target, err.Error())
result.Failed = true
result.Error = err.Error()
result.Write()
page.Close()
continue
}
result.Title = pageInfo.Title
var screenshotOptions = &proto.PageCaptureScreenshot{
Format: proto.PageCaptureScreenshotFormatJpeg,
Quality: gson.Int(80),
OptimizeForSpeed: true,
}
_, err = page.Screenshot(false, screenshotOptions)
if err != nil {
fmt.Printf(" erro | %s | failed to take screenshot: %s\n", target, err.Error())
result.Failed = true
result.Error = err.Error()
result.Write()
page.Close()
continue
}
result.Write()
page.Close()
fmt.Printf(" info | %s | done\n", target)
}
}
func main() {
file, err := os.Open(os.Args[1])
if err != nil {
panic(err)
}
defer file.Close()
scanner := bufio.NewScanner(file)
chrmLauncher := launcher.New().
Set("disable-features", "MediaRouter").
Set("disable-client-side-phishing-detection").
Set("disable-default-apps").
Set("hide-scrollbars").
Set("mute-audio").
Set("no-default-browser-check").
Set("no-first-run").
Set("deny-permission-prompts")
url, err := chrmLauncher.Launch()
if err != nil {
panic(err)
}
browser := rod.New().Trace(true).ControlURL(url)
if err := browser.Connect(); err != nil {
panic(err)
}
tasks := make(chan string, workers)
var wg sync.WaitGroup
for i := 0; i < workers; i++ {
wg.Add(1)
go worker(&wg, browser, tasks)
}
for scanner.Scan() {
candidate := scanner.Text()
if candidate == "" {
continue
}
lines := strings.Split(candidate, ",")
if len(lines) != 2 {
continue
}
target := `https://` + lines[1]
tasks <- target
}
close(tasks)
wg.Wait()
browser.Close()
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment