Created
December 9, 2016 15:35
-
-
Save juanbrujo/dee3070a30fa8cdd6fe082a54afe1c7d to your computer and use it in GitHub Desktop.
Scrapping a React App using PhantomJS and Cheerio
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
var phantom = require('phantom'); | |
var Q = require('q'); | |
var cheerio = require('cheerio'); | |
var _ph, _page, _outObj; | |
var url = ABSOLUTE_URL; // change here for your React app site | |
phantom.create().then(ph => { | |
_ph = ph; | |
return _ph.createPage(); | |
}).then(page => { | |
_page = page; | |
return _page.open(url); | |
}).then(status => { | |
console.log(status); | |
return waitState(textPopulated, 3); | |
}).then(() => { | |
return _page.property('content'); | |
}).then(content => { | |
var $ = cheerio.load(content); | |
var resultados = []; | |
$('.item.panel.panel-default').each(function() { | |
var title = $(this).find('.title').text(); | |
var link = $(this).find('a').attr('href'); | |
resultados.push( '<' + link + '|' + title + '>' ); | |
}); | |
_page.close(); | |
_ph.exit(); | |
}).catch(e => console.log(e)); | |
function textPopulated() { | |
return _page.evaluate(function() { | |
return document.querySelector('#app').outerHTML; | |
}).then(function(html) { | |
return html; | |
}); | |
} | |
function waitState(state, timeout) { // timeout in seconds is optional | |
console.log('Start waiting for state: ' + state.name); | |
var limitTime = timeout * 1000 || 20000; | |
var startTime = new Date(); | |
return wait(); | |
function wait() { | |
return state().then(function(result) { | |
if (result) { | |
console.log('Reached state: ' + state.name); | |
return; | |
} else if (new Date() - startTime > limitTime) { | |
var errorMessage = 'Timeout state: ' + state.name; | |
throw new Error(errorMessage); | |
} else { | |
return Q.delay(50).then(wait); | |
} | |
}).catch(function(error) { | |
throw error; | |
}); | |
} | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
recognized as being quite advantageous. Thank you so much for your code. Eventually, I hope we can play penalty shooters 2 together.