Last active
July 15, 2020 15:29
-
-
Save noogen/5f1b581894f3cbe84f7262a231a7712c to your computer and use it in GitHub Desktop.
Process file sync with readline
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
const fs = require('fs') | |
const readline = require('readline') | |
const stream = require('stream') | |
const {promisify} = require('util') | |
const got = require('got') | |
const pipeline = promisify(stream.pipeline) | |
/** | |
* Processing file line-by-line synchronously | |
* node readline-sync-process.js csvfile.csv index | |
* - index allow you to resume processing a file | |
* | |
* Example below use 'got' to download image from a Server | |
* with intermitten pause to prevent hammering of Server | |
* so you don't getting ban by Server DDOS setting | |
*/ | |
// run with: node readline-sync-process.js csvfile.csv 0 | |
const file = process.argv[2] | |
const start = parseInt(process.argv[3] || '0') || 0 | |
function sleep(ms) { | |
return new Promise((resolve) => { | |
setTimeout(resolve, ms); | |
}) | |
} | |
async function byLine(file, callback) { | |
const rs = fs.createReadStream(file); | |
const rl = readline.createInterface({ | |
input: rs, | |
crlfDelay: Infinity | |
}) | |
for await(const line of rl) { | |
await callback(line) | |
} | |
} | |
const main = async () => { | |
const now = new Date() | |
console.log(now, file) | |
console.log('skipping to: ' + start) | |
let idx = 0 | |
await byLine(file, async (line) => { | |
idx++ | |
if (idx < start) { | |
return | |
} | |
// timeout 1 seconds for every 10 | |
const timeout = ((idx % 10) === 0) ? 1000 : 1 | |
// example csv file: filename,https://example.com/file.jpg | |
const parts = line.split(',') | |
const filename = parts[0] | |
const url = parts[1] | |
console.log(idx, url, file) | |
// download image, save it | |
await pipeline( | |
got.stream(url), | |
fs.createWriteStream('./out/' + filename + '.jpg') | |
) | |
await sleep(timeout) | |
}) | |
} | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment