Last active
March 3, 2019 20:38
-
-
Save diegocasmo/5b8cee2f322f0d3625c29a6e4eb90ca8 to your computer and use it in GitHub Desktop.
Text processing script to mine https://remoteok.io/ job listings' titles.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// Get job listings' titles | |
var listings = Array.from(document.getElementsByTagName('h2')) | |
.splice(3) | |
.map(function(x) { return x.innerText }); | |
// Tokenize listings' titles | |
var keywords = [].concat.apply( | |
[], | |
listings.map(function(x) { return x.split(' ') })); | |
// Create an object where a key is a keyword and its value is the number of times it appears | |
var frequency = keywords.reduce(function(acc, x) { | |
return {...acc, [x]: acc[x] ? acc[x] + 1 : 1} | |
}, {}); | |
// Log sorted keywords by descending frequency count | |
console.log( | |
Object.keys(frequency) | |
.map(function(keyword) { return [keyword, frequency[keyword]] }) | |
.sort(function(a, b) { return b[1] - a[1] }) | |
); |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment