|
package main |
|
|
|
import ( |
|
"bufio" |
|
"context" |
|
"encoding/json" |
|
"fmt" |
|
"os" |
|
"strings" |
|
"sync" |
|
"time" |
|
|
|
_ "net/http/pprof" |
|
|
|
"github.com/chromedp/cdproto/page" |
|
"github.com/chromedp/chromedp" |
|
) |
|
|
|
var ( |
|
workers = 20 |
|
timeout = 60 |
|
) |
|
|
|
type Result struct { |
|
URL string `json:"url"` |
|
Title string `json:"title"` |
|
Failed bool `json:"failed"` |
|
Error string `json:"error"` |
|
} |
|
|
|
func (r *Result) Write() { |
|
j, err := json.Marshal(r) |
|
if err != nil { |
|
panic(err) |
|
} |
|
|
|
file, err := os.OpenFile("results.jsonl", |
|
os.O_APPEND|os.O_CREATE|os.O_WRONLY, 0644) |
|
if err != nil { |
|
panic(err) |
|
} |
|
|
|
if _, err := file.Write(append(j, '\n')); err != nil { |
|
panic(err) |
|
} |
|
} |
|
|
|
func worker(wg *sync.WaitGroup, ctx context.Context, tasks chan string) { |
|
defer wg.Done() |
|
|
|
for target := range tasks { |
|
var result = &Result{ |
|
URL: target, |
|
Failed: false, |
|
} |
|
|
|
tabCtx, cancel := chromedp.NewContext(ctx) // Open a new tab in the same browser |
|
defer cancel() |
|
|
|
tabCtx, tabCancel := context.WithTimeout(tabCtx, time.Duration(timeout)*time.Second) |
|
defer tabCancel() |
|
fmt.Printf(" info | %s | processing\n", target) |
|
|
|
// Navigate and get the title of the page |
|
var title string |
|
err := chromedp.Run(tabCtx, |
|
chromedp.Navigate(target), |
|
chromedp.Title(&title), |
|
) |
|
|
|
if err != nil { |
|
fmt.Printf(" erro | %s | failed to navigate or get title: %s\n", target, err.Error()) |
|
result.Failed = true |
|
result.Error = err.Error() |
|
result.Write() |
|
|
|
cancel() |
|
continue |
|
} |
|
result.Title = title |
|
|
|
// Take screenshot |
|
// var buf []byte |
|
err = chromedp.Run(tabCtx, |
|
chromedp.ActionFunc(func(ctx context.Context) error { |
|
var err error |
|
_, err = page.CaptureScreenshot(). |
|
WithQuality(80). |
|
WithOptimizeForSpeed(true). |
|
WithFormat(page.CaptureScreenshotFormatJpeg). |
|
Do(ctx) |
|
return err |
|
}), |
|
) |
|
|
|
if err != nil { |
|
fmt.Printf(" erro | %s | failed to take screenshot: %s\n", target, err.Error()) |
|
result.Failed = true |
|
result.Error = err.Error() |
|
result.Write() |
|
|
|
cancel() |
|
continue |
|
} |
|
|
|
// Optionally: Save screenshot to disk (if needed) |
|
// err = os.WriteFile(fmt.Sprintf("%s.jpg", target), buf, 0644) |
|
// if err != nil { |
|
// fmt.Printf(" erro | %s | failed to save screenshot: %s\n", target, err.Error()) |
|
// } |
|
|
|
result.Write() |
|
cancel() |
|
fmt.Printf(" info | %s | done\n", target) |
|
} |
|
} |
|
|
|
func main() { |
|
|
|
file, err := os.Open(os.Args[1]) |
|
if err != nil { |
|
panic(err) |
|
} |
|
defer file.Close() |
|
scanner := bufio.NewScanner(file) |
|
|
|
opts := append(chromedp.DefaultExecAllocatorOptions[:], |
|
chromedp.Flag("disable-features", "MediaRouter"), |
|
chromedp.Flag("disable-client-side-phishing-detection", true), |
|
chromedp.Flag("disable-default-apps", true), |
|
chromedp.Flag("hide-scrollbars", true), |
|
chromedp.Flag("mute-audio", true), |
|
chromedp.Flag("no-default-browser-check", true), |
|
chromedp.Flag("no-first-run", true), |
|
chromedp.Flag("deny-permission-prompts", true), |
|
) |
|
|
|
// Create a browser instance (ExecAllocator) with the options set |
|
allocCtx, cancel := chromedp.NewExecAllocator(context.Background(), opts...) |
|
defer cancel() |
|
|
|
// Use a single browser context for all tabs |
|
browserCtx, browserCancel := chromedp.NewContext(allocCtx) |
|
defer browserCancel() |
|
|
|
// make sure the browser is up |
|
if err := chromedp.Run(browserCtx); err != nil { |
|
browserCancel() |
|
panic(err) |
|
} |
|
|
|
tasks := make(chan string, workers) |
|
var wg sync.WaitGroup |
|
|
|
// Start the worker pool |
|
for i := 0; i < workers; i++ { |
|
wg.Add(1) |
|
go worker(&wg, browserCtx, tasks) |
|
} |
|
|
|
// Read input URLs and feed them into the task queue |
|
for scanner.Scan() { |
|
candidate := scanner.Text() |
|
if candidate == "" { |
|
continue |
|
} |
|
|
|
lines := strings.Split(candidate, ",") |
|
if len(lines) != 2 { |
|
continue |
|
} |
|
|
|
target := `https://` + lines[1] |
|
tasks <- target |
|
} |
|
|
|
close(tasks) |
|
|
|
wg.Wait() |
|
} |