Skip to content

Instantly share code, notes, and snippets.

@omirobarcelo
Created September 20, 2024 06:22
Show Gist options
  • Save omirobarcelo/a459f1b4fa6b47b6fb351eba477564fe to your computer and use it in GitHub Desktop.
Save omirobarcelo/a459f1b4fa6b47b6fb351eba477564fe to your computer and use it in GitHub Desktop.
Minimal Puppeteer Node.js code
const puppeteer = require('puppeteer-extra');
const RecaptchaPlugin = require('puppeteer-extra-plugin-recaptcha');
const StealthPlugin = require('puppeteer-extra-plugin-stealth');
const { consola } = require('consola');
const fs = require('fs');
const { delay } = require('../utils/async');
const DOCKER = process.env['DOCKER'];
const SERVICE = process.env['SERVICE'];
const CAPTCHA_PROVIDER = process.env['CAPTCHA_PROVIDER'];
const CAPTCHA_TOKEN = process.env['CAPTCHA_TOKEN'];
const logger = consola.withTag('[Minimal]');
puppeteer.use(StealthPlugin());
puppeteer.use(
RecaptchaPlugin({
provider: {
id: CAPTCHA_PROVIDER,
token: CAPTCHA_TOKEN,
},
visualFeedback: true,
}),
);
const getBaseCookies = async page => {
page.setDefaultTimeout(120000);
let url = 'https://google.com/';
let captchaSelector;
let cookieSelector = '#W0wltc';
let selector = '#APjFqb';
let delayMs = 500;
switch (SERVICE) {
case 'GOOGLE':
url = 'https://google.com/';
cookieSelector = '#W0wltc';
selector = '#APjFqb';
break;
case 'RENTALCARS':
url = 'https://www.rentalcars.com/';
captchaSelector = '.h-captcha';
cookieSelector = '#onetrust-reject-all-handler';
selector = '#searchbox-toolbox-fts-pickup';
delayMs = DOCKER ? 180000 : 500;
break;
}
logger.log(`Navigating to ${url}...`);
await page.goto(url);
logger.log(`Waiting ${delayMs}ms...`);
await delay(delayMs);
if (captchaSelector) {
try {
logger.debug(`Waiting for Captcha...`);
await page.waitForSelector(captchaSelector, { timeout: DOCKER ? 60000 : 5000 });
logger.debug(`Solving Captcha...`);
await page.solveRecaptchas();
} catch (_) {
logger.info('Captcha did not appear');
}
}
try {
logger.log(`Waiting for cookie banner...`);
await page.waitForSelector(cookieSelector, { timeout: DOCKER ? 60000 : 5000 });
logger.log(`Rejecting coookies...`);
await page.click(cookieSelector);
} catch (_) {
logger.info('Cookies banner did not appear');
}
// Getting the page source HTML
const pageSourceHTML = await page.content();
fs.writeFile(`output/page.html`, pageSourceHTML, err => {
if (err != null) {
logger.error('Error writing to file:', err);
return;
}
logger.info(`HTML written`);
});
logger.log(`Waiting for selector...`);
await page.waitForSelector(selector);
const cookies = await page.cookies();
return cookies;
};
(async () => {
logger.start('Starting...');
let browserOptions = {
headless: true,
args: [],
};
if (DOCKER) {
browserOptions = {
...browserOptions,
ignoreHTTPSErrors: true, //!!process.env.IGNORE_HTTPS_ERRORS,
executablePath: '/usr/bin/google-chrome', //'/usr/bin/chromium-browser',
headless: true,
args: [
'--no-sandbox',
'--ignore-certificate-errors',
'--ignore-certificate-errors-spki-list',
'--disable-setuid-sandbox',
'--disable-dev-shm-usage',
'--disable-gpu',
'--no-first-run',
'--no-zygote',
'--single-process',
],
};
}
const browser = await puppeteer.launch(browserOptions);
const [page] = await browser.pages();
page.on('request', request => {
console.log('On request');
console.log(request.url());
});
page.on('requestfailed', request => {
console.log('On request failed');
console.log(request.url());
});
page.on('response', response => {
console.log('On response');
console.log(response.url());
});
const baseCookies = await getBaseCookies(page);
const path = `output/minimal-cookies-${SERVICE.toLowerCase()}-${DOCKER ? 'docker' : 'headless'}.json`;
fs.writeFile(path, JSON.stringify(baseCookies), err => {
if (err != null) {
logger.error('Error writing to file:', err);
return;
}
logger.info(`JSON data has been written to ${path}`);
});
logger.info('Cleaning up...');
await browser.close();
})();
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment