Last active
February 22, 2019 21:17
Revisions
-
shirishp revised this gist
Nov 10, 2015 . 1 changed file with 11 additions and 9 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -8,27 +8,29 @@ var finalPage = '/strip/2006-01-01'; var nextPage = startingPage; function downloadFileInFolder(fromUrl, fileName) { request(fromUrl).pipe(fs.createWriteStream(fileName)); console.log('Saved image ' + fileName); } var scrapeStrip = function (error, response, body) { console.log('Now at ' + site + nextPage); if (!error) { var $ = cheerio.load(body); var stripImageUrl = $(".img-comic").attr('src'); console.log("Found strip at " + stripImageUrl); downloadFileInFolder(stripImageUrl, nextPage.substr(1) + '.jpg'); nextPage = $('.nav-right>a').attr('href'); console.log('Next page is ' + nextPage); if (nextPage != finalPage) { request(site + nextPage, null, scrapeStrip); } } else { console.log("We’ve encountered an error: " + error); } }; request(site + nextPage, null, scrapeStrip); -
shirishp created this gist
Nov 9, 2015 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,34 @@ var request = require("request"), cheerio = require("cheerio"), fs = require('fs'); var site = "http://dilbert.com"; var startingPage = "/strip/2005-01-01"; var finalPage = '/strip/2006-01-01'; var nextPage = startingPage; function saveFile(fromUrl, fileName) { request(fromUrl).pipe(fs.createWriteStream(fileName)); console.log('Saved image ' + fileName); } var afterRequest = function (error, response, body) { console.log('Now at ' + site + nextPage); if (!error) { var $ = cheerio.load(body); var comicImageUrl = $(".img-comic").attr('src'); console.log("Downloading image from " + comicImageUrl); saveFile(comicImageUrl, nextPage.substr(1) + '.jpg'); //request(comicImageUrl).pipe(fs.createWriteStream(startingPage.substr(1) + '.jpg')); nextPage = $('.nav-right>a').attr('href'); console.log('next page is ' + nextPage); if (nextPage != finalPage) { request(site + nextPage, null, afterRequest); } } else { console.log("We’ve encountered an error: " + error); } }; request(site + nextPage, null, afterRequest);