Skip to content

Instantly share code, notes, and snippets.

View uratmangun's full-sized avatar
⛰️
bismillah

uratmangun.ovh uratmangun

⛰️
bismillah
View GitHub Profile
if you don't know the answer, try to search it on the internet. You can do so by running the following command:
```
npx @uratmangun/scraper-tool search "<query>"
```
Let's say the query is "how to install frog.fm".
```
npx @uratmangun/scraper-tool search "how to install frog.fm"
{
"name": "@uratmangun/scraper-tool",
"version": "1.0.0",
"description": "A CLI tool for web scraping and Google search",
"type": "module",
"bin": {
"@uratmangun/scraper-tool": "./bin/cli.js"
},
"files": [
"bin",
#!/usr/bin/env node
import { fileURLToPath } from 'url';
import { dirname, join } from 'path';
import { spawn } from 'child_process';
const __filename = fileURLToPath(import.meta.url);
const __dirname = dirname(__filename);
const scriptPath = join(__dirname, '..', 'scripts', 'scrape-or-search.mjs');
import { chromium } from 'playwright';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
import { searchParser } from './convert-search.mjs';
dotenv.config({ path: '.env.local' });
/**
* Scrapes content from a given URL using Playwright with CDP connection
* @param {string} url - The URL to scrape
import OpenAI from "openai"
import dotenv from 'dotenv';
import { zodResponseFormat } from "openai/helpers/zod";
import { z } from "zod";
dotenv.config({ path: '.env.local' });
const openai = new OpenAI({
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/",
apiKey: process.env.GEMINI_API_KEY
})
const SearchResult = z.array(
import { chromium } from 'playwright';
import dotenv from 'dotenv';
// Load environment variables
dotenv.config({ path: '.env.local' });
async function getGoogleHtml(query = '', baseUrl = 'https://www.google.com/search?q=') {
const browser = await chromium.connectOverCDP(process.env.BRIGHT_PLAYWRIGHT_URL);
const searchUrl = baseUrl + encodeURIComponent(query);
import { chromium } from 'playwright';
import { fileURLToPath } from 'url';
import dotenv from 'dotenv';
dotenv.config({ path: '.env.local' });
/**
* Scrapes content from a given URL using Playwright with CDP connection
* @param {string} url - The URL to scrape
* @returns {Promise<string>} - The scraped content
*/
async function getAllItems(page) {
const products = await page.$$eval('div.list-product-items', (productElements) => {
return productElements.map(productElement => {
const titleElement = productElement.querySelector('p.product_name');
const priceElement = productElement.querySelector('p.price span');
const imageElement = productElement.querySelector('img');
const availabilityElement = productElement.querySelector('.availability-label');
const title = titleElement?.textContent?.trim() || null;
async function getCategories(page) {
console.log('Navigating to https://alfagift.id/');
await page.goto('https://alfagift.id/');
console.log('Waiting for response...');
// Wait for the specific categories menu container
// await page.waitForSelector('//*[@id="__layout"]/div/div[1]/header[1]/div[2]/div/div/div[1]/div[1]/div/div/div/div[2]/div/div/div');
// Get and log the content of the categories container
const containerContent = await page.evaluate(() => {
const container = document.evaluate(
import * as pw from 'playwright';
const AUTH = '';
const SBR_CDP = `wss://${AUTH}@brd.superproxy.io:9222`;
async function main() {
console.log('Connecting to Scraping Browser...');
const browser = await pw.chromium.connectOverCDP(SBR_CDP);
try {
console.log('Connected! Navigating...');
const page = await browser.newPage();
await page.goto('https://example.com', { timeout: 2 * 60 * 1000 });