Last active
January 25, 2017 17:39
-
-
Save softe1988/7dab60e5cbfdd5f860fa59d1ddb44cd9 to your computer and use it in GitHub Desktop.
Commandline Program to take user inputs and then return links using Horseman and Phantom
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var chalk = require('chalk'), | |
CLI = require('clui'), | |
inquirer = require('inquirer'), | |
clear = require('clear'), | |
figlet = require('figlet'); | |
clear(); | |
console.log( | |
chalk.yellow( | |
figlet.textSync('App Name', {horizonalLayout: 'full'}) | |
) | |
); | |
function getSearchParams(callback) { | |
var questions = [ | |
{ | |
name: 'city', | |
type: 'input', | |
message: 'Please enter the name of the city you want to search:', | |
validate: (value) => { | |
if(value.length >= 2) { | |
return true; | |
} | |
return "Please enter a valid city name"; | |
} | |
}, | |
{ | |
name: 'state', | |
type: 'input', | |
message: 'Please enter a 2 letter state abbreviation:', | |
validate: (value) => { | |
if(value.length === 2) { | |
return true; | |
} | |
return "Please enter a 2 letter state abbreviation" | |
} | |
}, | |
{ | |
name: 'query', | |
type: 'input', | |
message: 'Please enter query information:', | |
validate: (value) => { | |
if(value.length >= 2) { | |
return true; | |
} | |
return "Please enter terms you wish to search" | |
} | |
} | |
]; | |
inquirer.prompt(questions).then(callback); | |
} | |
/* if you want to call it seperately | |
getSearchParams(function(){ | |
const queryInfo = {arguments}; | |
return queryInfo; | |
}); | |
*/ | |
module.exports = getSearchParams; |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var Horseman = require('node-horseman'), | |
phantom = require('phantomjs-prebuilt'), | |
CLI = require('clui'), | |
fs = require('fs'); | |
const getSearchParams = require('./commandLine'); | |
var horseman = new Horseman({ | |
injectJquery: true, | |
injectBluebird: true, | |
debugPort: process.env.PORT, | |
webSecurity: false | |
}); | |
var result = {}; | |
//error logging | |
horseman | |
.on('error', function (msg, trace) { | |
console.log(msg, trace); | |
}).on('timeout', function (timeout, msg) { | |
console.log('timeout', msg); | |
}).on('resourceTimeout', function (msg) { | |
console.log('resourceTimeout', msg); | |
}).on('resourceError', function (msg) { | |
console.log('resourceError', msg); | |
}).on('loadFinished', function (msg) { | |
console.log('loadFinished', msg); | |
}).on('loadStarted', function (msg) { | |
console.log('loadStarted', msg); | |
}); | |
function getLinks(){ | |
return horseman.evaluate(function(){ | |
// This code is executed in the browser. | |
var links = []; | |
$("div.g h3.r a").each(function(item){ | |
var link = { | |
title : $(this).text(), | |
url : $(this).attr("href") | |
}; | |
links.push(JSON.stringify(link, null, 3)); | |
}); | |
return links; | |
}); | |
} | |
function hasNextPage(){ | |
return horseman.exists("#pnnext"); | |
} | |
function scrape(){ | |
return getLinks() | |
.then(function(newLinks){ | |
fs.appendFileSync('../path/to/file.json', newLinks + "," + "\n"); | |
return hasNextPage() | |
.then(function(hasNext){ | |
if (hasNext){ | |
return horseman | |
.click("#pnnext") | |
.wait(2000) | |
.then( scrape ); | |
} | |
fs.appendFileSync('../path/to/file.json', newLinks.slice(-1)); | |
return ; | |
}) | |
}).catch(function(err) { | |
console.log("Error " + err.message); | |
return err; | |
}) | |
} | |
function getUserInput(){ | |
//synchronous execution get user input then search with values | |
return horseman | |
.do(getSearchParams) | |
.then(function(value){ | |
if(value !== undefined) { | |
/*return value is an object of the user inputs*/ | |
return horseman | |
.log(value) | |
.userAgent(/*Firefox*/'*Mozilla Firefox/47.0.1 (Windows NT 6.1; WOW64; rv:27.0) Gecko/20100101 Chrome 53.0.2785.34 Internet Explorer 11.0.28 Safari 9.1.2') | |
.open('https://www.google.com') | |
.type('input[name="q"]', `${value.query} ${value.city},${value.state} `) | |
.log(`Searching for: ${value.query} ${value.city},${value.state}`) | |
.click('[name="btnG"]') | |
.keyboardEvent('keypress', 16777221) | |
.waitForSelector('div.g') //'div#MSOZoneCell_WebPartWPQ3' | |
//.then(status.start()) | |
.then(fs.appendFileSync('../path/to/file.json', "[" + "\n")) | |
.then(scrape) | |
// .then(status.stop()) | |
.log("Scraping Complete") | |
.finally(function(){ | |
fs.appendFileSync('../path/to/file.json', "]" + "\n") | |
horseman.close(); | |
}); | |
} | |
}).then(function(query){ | |
console.log("Results populated in: ../path/to/file.json"); | |
}) | |
}; | |
//runs a google search with a specific query string | |
horseman | |
.do(getUserInput) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment