Skip to content

Instantly share code, notes, and snippets.

@softe1988
Last active January 25, 2017 17:39
Show Gist options
  • Save softe1988/7dab60e5cbfdd5f860fa59d1ddb44cd9 to your computer and use it in GitHub Desktop.
Save softe1988/7dab60e5cbfdd5f860fa59d1ddb44cd9 to your computer and use it in GitHub Desktop.
Commandline Program to take user inputs and then return links using Horseman and Phantom
var chalk = require('chalk'),
CLI = require('clui'),
inquirer = require('inquirer'),
clear = require('clear'),
figlet = require('figlet');
clear();
console.log(
chalk.yellow(
figlet.textSync('App Name', {horizonalLayout: 'full'})
)
);
function getSearchParams(callback) {
var questions = [
{
name: 'city',
type: 'input',
message: 'Please enter the name of the city you want to search:',
validate: (value) => {
if(value.length >= 2) {
return true;
}
return "Please enter a valid city name";
}
},
{
name: 'state',
type: 'input',
message: 'Please enter a 2 letter state abbreviation:',
validate: (value) => {
if(value.length === 2) {
return true;
}
return "Please enter a 2 letter state abbreviation"
}
},
{
name: 'query',
type: 'input',
message: 'Please enter query information:',
validate: (value) => {
if(value.length >= 2) {
return true;
}
return "Please enter terms you wish to search"
}
}
];
inquirer.prompt(questions).then(callback);
}
/* if you want to call it seperately
getSearchParams(function(){
const queryInfo = {arguments};
return queryInfo;
});
*/
module.exports = getSearchParams;
var Horseman = require('node-horseman'),
phantom = require('phantomjs-prebuilt'),
CLI = require('clui'),
fs = require('fs');
const getSearchParams = require('./commandLine');
var horseman = new Horseman({
injectJquery: true,
injectBluebird: true,
debugPort: process.env.PORT,
webSecurity: false
});
var result = {};
//error logging
horseman
.on('error', function (msg, trace) {
console.log(msg, trace);
}).on('timeout', function (timeout, msg) {
console.log('timeout', msg);
}).on('resourceTimeout', function (msg) {
console.log('resourceTimeout', msg);
}).on('resourceError', function (msg) {
console.log('resourceError', msg);
}).on('loadFinished', function (msg) {
console.log('loadFinished', msg);
}).on('loadStarted', function (msg) {
console.log('loadStarted', msg);
});
function getLinks(){
return horseman.evaluate(function(){
// This code is executed in the browser.
var links = [];
$("div.g h3.r a").each(function(item){
var link = {
title : $(this).text(),
url : $(this).attr("href")
};
links.push(JSON.stringify(link, null, 3));
});
return links;
});
}
function hasNextPage(){
return horseman.exists("#pnnext");
}
function scrape(){
return getLinks()
.then(function(newLinks){
fs.appendFileSync('../path/to/file.json', newLinks + "," + "\n");
return hasNextPage()
.then(function(hasNext){
if (hasNext){
return horseman
.click("#pnnext")
.wait(2000)
.then( scrape );
}
fs.appendFileSync('../path/to/file.json', newLinks.slice(-1));
return ;
})
}).catch(function(err) {
console.log("Error " + err.message);
return err;
})
}
function getUserInput(){
//synchronous execution get user input then search with values
return horseman
.do(getSearchParams)
.then(function(value){
if(value !== undefined) {
/*return value is an object of the user inputs*/
return horseman
.log(value)
.userAgent(/*Firefox*/'*Mozilla Firefox/47.0.1 (Windows NT 6.1; WOW64; rv:27.0) Gecko/20100101 Chrome 53.0.2785.34 Internet Explorer 11.0.28 Safari 9.1.2')
.open('https://www.google.com')
.type('input[name="q"]', `${value.query} ${value.city},${value.state} `)
.log(`Searching for: ${value.query} ${value.city},${value.state}`)
.click('[name="btnG"]')
.keyboardEvent('keypress', 16777221)
.waitForSelector('div.g') //'div#MSOZoneCell_WebPartWPQ3'
//.then(status.start())
.then(fs.appendFileSync('../path/to/file.json', "[" + "\n"))
.then(scrape)
// .then(status.stop())
.log("Scraping Complete")
.finally(function(){
fs.appendFileSync('../path/to/file.json', "]" + "\n")
horseman.close();
});
}
}).then(function(query){
console.log("Results populated in: ../path/to/file.json");
})
};
//runs a google search with a specific query string
horseman
.do(getUserInput)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment