Skip to content

Instantly share code, notes, and snippets.

@shirishp
Last active February 22, 2019 21:17

Revisions

  1. shirishp revised this gist Nov 10, 2015. 1 changed file with 11 additions and 9 deletions.
    20 changes: 11 additions & 9 deletions dilbert-strip-scrapper.js
    Original file line number Diff line number Diff line change
    @@ -8,27 +8,29 @@ var finalPage = '/strip/2006-01-01';

    var nextPage = startingPage;

    function saveFile(fromUrl, fileName) {
    function downloadFileInFolder(fromUrl, fileName) {
    request(fromUrl).pipe(fs.createWriteStream(fileName));
    console.log('Saved image ' + fileName);
    }

    var afterRequest = function (error, response, body) {
    var scrapeStrip = function (error, response, body) {
    console.log('Now at ' + site + nextPage);
    if (!error) {
    var $ = cheerio.load(body);
    var comicImageUrl = $(".img-comic").attr('src');
    console.log("Downloading image from " + comicImageUrl);
    saveFile(comicImageUrl, nextPage.substr(1) + '.jpg');
    //request(comicImageUrl).pipe(fs.createWriteStream(startingPage.substr(1) + '.jpg'));
    var stripImageUrl = $(".img-comic").attr('src');
    console.log("Found strip at " + stripImageUrl);

    downloadFileInFolder(stripImageUrl, nextPage.substr(1) + '.jpg');

    nextPage = $('.nav-right>a').attr('href');
    console.log('next page is ' + nextPage);
    console.log('Next page is ' + nextPage);

    if (nextPage != finalPage) {
    request(site + nextPage, null, afterRequest);
    request(site + nextPage, null, scrapeStrip);
    }
    } else {
    console.log("We’ve encountered an error: " + error);
    }
    };

    request(site + nextPage, null, afterRequest);
    request(site + nextPage, null, scrapeStrip);
  2. shirishp created this gist Nov 9, 2015.
    34 changes: 34 additions & 0 deletions dilbert-strip-scrapper.js
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,34 @@
    var request = require("request"),
    cheerio = require("cheerio"),
    fs = require('fs');

    var site = "http://dilbert.com";
    var startingPage = "/strip/2005-01-01";
    var finalPage = '/strip/2006-01-01';

    var nextPage = startingPage;

    function saveFile(fromUrl, fileName) {
    request(fromUrl).pipe(fs.createWriteStream(fileName));
    console.log('Saved image ' + fileName);
    }

    var afterRequest = function (error, response, body) {
    console.log('Now at ' + site + nextPage);
    if (!error) {
    var $ = cheerio.load(body);
    var comicImageUrl = $(".img-comic").attr('src');
    console.log("Downloading image from " + comicImageUrl);
    saveFile(comicImageUrl, nextPage.substr(1) + '.jpg');
    //request(comicImageUrl).pipe(fs.createWriteStream(startingPage.substr(1) + '.jpg'));
    nextPage = $('.nav-right>a').attr('href');
    console.log('next page is ' + nextPage);
    if (nextPage != finalPage) {
    request(site + nextPage, null, afterRequest);
    }
    } else {
    console.log("We’ve encountered an error: " + error);
    }
    };

    request(site + nextPage, null, afterRequest);