Created
September 20, 2024 06:22
-
-
Save omirobarcelo/a459f1b4fa6b47b6fb351eba477564fe to your computer and use it in GitHub Desktop.
Minimal Puppeteer Node.js code
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const puppeteer = require('puppeteer-extra'); | |
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha'); | |
const StealthPlugin = require('puppeteer-extra-plugin-stealth'); | |
const { consola } = require('consola'); | |
const fs = require('fs'); | |
const { delay } = require('../utils/async'); | |
const DOCKER = process.env['DOCKER']; | |
const SERVICE = process.env['SERVICE']; | |
const CAPTCHA_PROVIDER = process.env['CAPTCHA_PROVIDER']; | |
const CAPTCHA_TOKEN = process.env['CAPTCHA_TOKEN']; | |
const logger = consola.withTag('[Minimal]'); | |
puppeteer.use(StealthPlugin()); | |
puppeteer.use( | |
RecaptchaPlugin({ | |
provider: { | |
id: CAPTCHA_PROVIDER, | |
token: CAPTCHA_TOKEN, | |
}, | |
visualFeedback: true, | |
}), | |
); | |
const getBaseCookies = async page => { | |
page.setDefaultTimeout(120000); | |
let url = 'https://google.com/'; | |
let captchaSelector; | |
let cookieSelector = '#W0wltc'; | |
let selector = '#APjFqb'; | |
let delayMs = 500; | |
switch (SERVICE) { | |
case 'GOOGLE': | |
url = 'https://google.com/'; | |
cookieSelector = '#W0wltc'; | |
selector = '#APjFqb'; | |
break; | |
case 'RENTALCARS': | |
url = 'https://www.rentalcars.com/'; | |
captchaSelector = '.h-captcha'; | |
cookieSelector = '#onetrust-reject-all-handler'; | |
selector = '#searchbox-toolbox-fts-pickup'; | |
delayMs = DOCKER ? 180000 : 500; | |
break; | |
} | |
logger.log(`Navigating to ${url}...`); | |
await page.goto(url); | |
logger.log(`Waiting ${delayMs}ms...`); | |
await delay(delayMs); | |
if (captchaSelector) { | |
try { | |
logger.debug(`Waiting for Captcha...`); | |
await page.waitForSelector(captchaSelector, { timeout: DOCKER ? 60000 : 5000 }); | |
logger.debug(`Solving Captcha...`); | |
await page.solveRecaptchas(); | |
} catch (_) { | |
logger.info('Captcha did not appear'); | |
} | |
} | |
try { | |
logger.log(`Waiting for cookie banner...`); | |
await page.waitForSelector(cookieSelector, { timeout: DOCKER ? 60000 : 5000 }); | |
logger.log(`Rejecting coookies...`); | |
await page.click(cookieSelector); | |
} catch (_) { | |
logger.info('Cookies banner did not appear'); | |
} | |
// Getting the page source HTML | |
const pageSourceHTML = await page.content(); | |
fs.writeFile(`output/page.html`, pageSourceHTML, err => { | |
if (err != null) { | |
logger.error('Error writing to file:', err); | |
return; | |
} | |
logger.info(`HTML written`); | |
}); | |
logger.log(`Waiting for selector...`); | |
await page.waitForSelector(selector); | |
const cookies = await page.cookies(); | |
return cookies; | |
}; | |
(async () => { | |
logger.start('Starting...'); | |
let browserOptions = { | |
headless: true, | |
args: [], | |
}; | |
if (DOCKER) { | |
browserOptions = { | |
...browserOptions, | |
ignoreHTTPSErrors: true, //!!process.env.IGNORE_HTTPS_ERRORS, | |
executablePath: '/usr/bin/google-chrome', //'/usr/bin/chromium-browser', | |
headless: true, | |
args: [ | |
'--no-sandbox', | |
'--ignore-certificate-errors', | |
'--ignore-certificate-errors-spki-list', | |
'--disable-setuid-sandbox', | |
'--disable-dev-shm-usage', | |
'--disable-gpu', | |
'--no-first-run', | |
'--no-zygote', | |
'--single-process', | |
], | |
}; | |
} | |
const browser = await puppeteer.launch(browserOptions); | |
const [page] = await browser.pages(); | |
page.on('request', request => { | |
console.log('On request'); | |
console.log(request.url()); | |
}); | |
page.on('requestfailed', request => { | |
console.log('On request failed'); | |
console.log(request.url()); | |
}); | |
page.on('response', response => { | |
console.log('On response'); | |
console.log(response.url()); | |
}); | |
const baseCookies = await getBaseCookies(page); | |
const path = `output/minimal-cookies-${SERVICE.toLowerCase()}-${DOCKER ? 'docker' : 'headless'}.json`; | |
fs.writeFile(path, JSON.stringify(baseCookies), err => { | |
if (err != null) { | |
logger.error('Error writing to file:', err); | |
return; | |
} | |
logger.info(`JSON data has been written to ${path}`); | |
}); | |
logger.info('Cleaning up...'); | |
await browser.close(); | |
})(); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment