This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
if you don't know the answer, try to search it on the internet. You can do so by running the following command: | |
``` | |
npx @uratmangun/scraper-tool search "<query>" | |
``` | |
Let's say the query is "how to install frog.fm". | |
``` | |
npx @uratmangun/scraper-tool search "how to install frog.fm" |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"name": "@uratmangun/scraper-tool", | |
"version": "1.0.0", | |
"description": "A CLI tool for web scraping and Google search", | |
"type": "module", | |
"bin": { | |
"@uratmangun/scraper-tool": "./bin/cli.js" | |
}, | |
"files": [ | |
"bin", |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env node | |
import { fileURLToPath } from 'url'; | |
import { dirname, join } from 'path'; | |
import { spawn } from 'child_process'; | |
const __filename = fileURLToPath(import.meta.url); | |
const __dirname = dirname(__filename); | |
const scriptPath = join(__dirname, '..', 'scripts', 'scrape-or-search.mjs'); |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { chromium } from 'playwright'; | |
import { fileURLToPath } from 'url'; | |
import dotenv from 'dotenv'; | |
import { searchParser } from './convert-search.mjs'; | |
dotenv.config({ path: '.env.local' }); | |
/** | |
* Scrapes content from a given URL using Playwright with CDP connection | |
* @param {string} url - The URL to scrape |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import OpenAI from "openai" | |
import dotenv from 'dotenv'; | |
import { zodResponseFormat } from "openai/helpers/zod"; | |
import { z } from "zod"; | |
dotenv.config({ path: '.env.local' }); | |
const openai = new OpenAI({ | |
baseURL: "https://generativelanguage.googleapis.com/v1beta/openai/", | |
apiKey: process.env.GEMINI_API_KEY | |
}) | |
const SearchResult = z.array( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { chromium } from 'playwright'; | |
import dotenv from 'dotenv'; | |
// Load environment variables | |
dotenv.config({ path: '.env.local' }); | |
async function getGoogleHtml(query = '', baseUrl = 'https://www.google.com/search?q=') { | |
const browser = await chromium.connectOverCDP(process.env.BRIGHT_PLAYWRIGHT_URL); | |
const searchUrl = baseUrl + encodeURIComponent(query); | |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import { chromium } from 'playwright'; | |
import { fileURLToPath } from 'url'; | |
import dotenv from 'dotenv'; | |
dotenv.config({ path: '.env.local' }); | |
/** | |
* Scrapes content from a given URL using Playwright with CDP connection | |
* @param {string} url - The URL to scrape | |
* @returns {Promise<string>} - The scraped content | |
*/ |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function getAllItems(page) { | |
const products = await page.$$eval('div.list-product-items', (productElements) => { | |
return productElements.map(productElement => { | |
const titleElement = productElement.querySelector('p.product_name'); | |
const priceElement = productElement.querySelector('p.price span'); | |
const imageElement = productElement.querySelector('img'); | |
const availabilityElement = productElement.querySelector('.availability-label'); | |
const title = titleElement?.textContent?.trim() || null; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
async function getCategories(page) { | |
console.log('Navigating to https://alfagift.id/'); | |
await page.goto('https://alfagift.id/'); | |
console.log('Waiting for response...'); | |
// Wait for the specific categories menu container | |
// await page.waitForSelector('//*[@id="__layout"]/div/div[1]/header[1]/div[2]/div/div/div[1]/div[1]/div/div/div/div[2]/div/div/div'); | |
// Get and log the content of the categories container | |
const containerContent = await page.evaluate(() => { | |
const container = document.evaluate( |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import * as pw from 'playwright'; | |
const AUTH = ''; | |
const SBR_CDP = `wss://${AUTH}@brd.superproxy.io:9222`; | |
async function main() { | |
console.log('Connecting to Scraping Browser...'); | |
const browser = await pw.chromium.connectOverCDP(SBR_CDP); | |
try { | |
console.log('Connected! Navigating...'); | |
const page = await browser.newPage(); | |
await page.goto('https://example.com', { timeout: 2 * 60 * 1000 }); |
NewerOlder