9point6 · October 27, 2016 10:10
diff --git a/gc-parser.js b/gc-parser.js
 'use strict';
 const Promise = require('bluebird');
 const _ = require('lodash');
 const path = require('path');
 const fs = Promise.promisifyAll(require('fs'));

 const DEBUG = true;
 const FILENAME = './gc';
 const RUBBISH_THRESHOLD = 5;
 const ALPHA_REGEX = /^[a-zA-Z\s]+$/;
 const LINE_REGEX = /(PM2:\s)\[([0-9:xa-f\s]+)\]\s+(.*)/;
 const HEAP_REGEX = /Heap growing factor ([0-9\.]+) based on mu=([0-9\.]+), speed_ratio=([0-9]+) \(gc=([0-9]+), mutator=([0-9]+)\)/;
 const CRAZY_REGEX = /([0-9]+)\sms:\s([A-Za-z\-]+)\s([0-9\.]+)\s\(([0-9\.]+)\)\s->\s([0-9\.]+)\s\(([0-9\.]+)\)\sMB,\s([0-9\.]+)\s\/\s([0-9\.]+)/;

 const debugLine = (error, line) => {
    if (!DEBUG) {
        return;
    }

    console.error(`${error}:\n${line}`);
 };

 const getPath = _.partial(path.resolve, __dirname, FILENAME);

 const getFileStream = _.flow(
    getPath,
    fs.createReadStream
 );

 const chunkToLines = (iterator) => {
    let remainder = '';

    return (input) => {
        if (!input) {
            return iterator(remainder);
        }

        const lines = (remainder + input).split('\n');
        remainder = lines.pop();

        return lines.map(iterator);
    };
 };

 const trimAssign = _.flow(
    _.overArgs(_.concat, [_.identity, _.partial(_.split, _, ':')]),
    _.spread(_.overArgs(_.set, [_.identity, _.trim, _.trim]))
 );

 const processLine = (line) => {
    const indexOfUsed = line.indexOf('used:');
    if (indexOfUsed !== -1) {
        const indexOfMs = line.indexOf(' ms ');
        if (indexOfMs !== -1 && indexOfUsed > indexOfMs) {
            line = line.slice(indexOfMs + 4);
        }

        const parts = line.split(',');
        const type = parts.shift();
        return { [type]: _.reduce(parts, trimAssign, {}) };
    }

    if (line.indexOf('->') !== -1) {
        const parsed = CRAZY_REGEX.exec(line);
        if (parsed) {
            return {
                GC: {
                    cumulativeMs: parsed[1],
                    type: parsed[2],
                    beforeHeapMB: parsed[3],
                    beforeSystemMB: parsed[4],
                    afterHeapMB: parsed[5],
                    afterSystemMB: parsed[6],
                    timeSpendGCing: parsed[7],
                    externalTime: parsed[8],
                    gcReason: parsed[9]
                }
            };
        }
    }

    if (line.indexOf('Heap growing factor') === 0) {
        const parsed = HEAP_REGEX.exec(line);
        if (parsed) {
            return {
                'Heap growth': {
                    factor: parsed[1],
                    mu: parsed[2],
                    speedRatio: parsed[3],
                    gc: parsed[4],
                    mutator: parsed[5]
                }
            };
        }
    }

    if (line.startsWith('Grow:') || line.startsWith('Dampen:')) {
        const sansSpaces = line.split(' ');
        const type = sansSpaces.shift().slice(0, -1);
        const ratio = sansSpaces.pop().slice(1, -1);

        return {
            action: _(sansSpaces)
                .join(' ')
                .split(',')
                .reduce(trimAssign, {
                    type,
                    ratio
                })
         };
    }

    if (line.indexOf(':') !== -1) {
        if (line.split(':').length === 2) {
            return trimAssign({}, line);
        }
    }

    debugLine('UNPROCESSED LINE', line);
    return line;
 };

 const processStream = (stream) => {
    let output = [];
    const dechunker = _.flow(chunkToLines((line) => {
        const lineParts = LINE_REGEX.exec(line);

        if (!lineParts) {
            debugLine('UNREGEXABLE LINE:', line);
            return {};
        }

        return {
            process: lineParts[2],
            data: processLine(lineParts[3])
        };
    }), (out) => {
        output = output.concat(out);
    });

    return new Promise((resolve, reject) => {
        stream.on('data', dechunker);
        stream.on('end', _.flow(dechunker, () => {
            console.error('Done Parsing!');
            resolve(output);
        }));
    });
 };

 const jsonSaver = (destination) => _.flow(
    _.bind(JSON.stringify, JSON, _, _, 2),
    _.bind(fs.writeFileAsync, fs, destination)
 );

 const dataFlattener = _.flow(
    _.property('data'),
    _.toPairs,
    _.property(0)
 );

 const isNotRubbish = _.overEvery(
    _.isArray,
    _.flow(
        _.partial(_.get, _, '0'),
        _.negate(_.overSome(
            _.partial(_.startsWith, _, '['),
            _.partial(_.endsWith, _, ' ms')
        ))
    )
 );

 const aggregateToArrays = (memo, [key, val]) => {
    if (!memo[key]) {
        memo[key] = [];
    }

    memo[key].push(val);
    return memo;
 };

 const groupProcess = (process) => _(process)
    .map(dataFlattener)
    .filter(isNotRubbish)
    .reduce(aggregateToArrays, {});


 const groupData = (data) => _(data)
    .filter('process')
    .map((item) => _.set(item, 'process', item.process.split(' ').join('')))
    .groupBy('process')
    .mapValues(groupProcess)
    .value();

 const toNumber = (str) => {
    const num = Number(str) || parseFloat(str) || parseInt(str);
    return _.isNaN(num) ? str : num;
 };

 const aggAdd = (memo, key, item) => _.add(_.get(memo, key, 0), toNumber(item));

 const sumObject = (summed, val, key) => {
    if (_.isString(val)) {
        return summed;
    }

    return _.set((summed || {}), key, aggAdd(summed, key, val));
 };

 const fixObject = (obj) => _.reduce(obj, (memo, val, key) => {
    if (ALPHA_REGEX.test(key)) {
        memo[key.split(' ').join('')] = toNumber(val);
    }

    return memo;
 }, {});

 const fixData = _.cond([
    [_.isObject, fixObject],
    [_.isString, toNumber]
 ]);

 const discreteObject = (memo, item, key) => {
    if (!memo[key]) {
        memo[key] = [];
    }

    memo[key].push(item);
    return memo;
 };

 const dataSummer = (key, type, memo, item) => {
    // (key === '0'): hacky remove weird set
    if (typeof item !== type || key === '0') {
        return memo;
    }

    item = fixData(item);

    const count = memo.count + 1;
    let sum, discrete;

    if (type === 'object') {
        discrete = _.reduce(item, discreteObject, (memo.discrete || {}));
        sum = _.reduce(item, sumObject, memo.sum);
    } else if (type === 'string') {
        discrete = _.concat((memo.discrete || []), item);
        sum = aggAdd(memo, 'sum', item);
    }

    return { key, count, sum, discrete };
 };

 const reduceToSums = (data, key) => _.reduce(
    data,
    _.partial(dataSummer, key, typeof data[0]),
    { count: 0 }
 );

 const computeMean = (data) => {
    if (typeof data.sum === 'object') {
        data.mean = _.mapValues(data.sum, _.partial(_.divide, _, data.count));
    } else {
        data.mean = _.divide(data.sum, data.count);
    }

    return data;
 };

 const aggregateProcess = (process) => _(process)
    .mapValues(reduceToSums)
    .filter((item) => item.sum && item.count > RUBBISH_THRESHOLD)
    .map(computeMean)
    .value();

 const aggregateData = (data) => _.mapValues(data, aggregateProcess);

 const main = _.flow(getFileStream, processStream);
 main()
    .tap(jsonSaver(`${FILENAME}.json`))
    .then(groupData)
    .tap(jsonSaver(`${FILENAME}.grouped.json`))
    .then(aggregateData)
    .tap(jsonSaver(`${FILENAME}.aggregated.json`));
	'use strict';
	const Promise = require('bluebird');
	const _ = require('lodash');
	const path = require('path');
	const fs = Promise.promisifyAll(require('fs'));

	const DEBUG = true;
	const FILENAME = './gc';
	const RUBBISH_THRESHOLD = 5;
	const ALPHA_REGEX = /^[a-zA-Z\s]+$/;
	const LINE_REGEX = /(PM2:\s)\[([0-9:xa-f\s]+)\]\s+(.*)/;
	const HEAP_REGEX = /Heap growing factor ([0-9\.]+) based on mu=([0-9\.]+), speed_ratio=([0-9]+) \(gc=([0-9]+), mutator=([0-9]+)\)/;
	const CRAZY_REGEX = /([0-9]+)\sms:\s([A-Za-z\-]+)\s([0-9\.]+)\s\(([0-9\.]+)\)\s->\s([0-9\.]+)\s\(([0-9\.]+)\)\sMB,\s([0-9\.]+)\s\/\s([0-9\.]+)/;

	const debugLine = (error, line) => {
	if (!DEBUG) {
	return;
	}

	console.error(`${error}:\n${line}`);
	};

	const getPath = _.partial(path.resolve, __dirname, FILENAME);

	const getFileStream = _.flow(
	getPath,
	fs.createReadStream
	);

	const chunkToLines = (iterator) => {
	let remainder = '';

	return (input) => {
	if (!input) {
	return iterator(remainder);
	}

	const lines = (remainder + input).split('\n');
	remainder = lines.pop();

	return lines.map(iterator);
	};
	};

	const trimAssign = _.flow(
	_.overArgs(_.concat, [_.identity, _.partial(_.split, _, ':')]),
	_.spread(_.overArgs(_.set, [_.identity, _.trim, _.trim]))
	);

	const processLine = (line) => {
	const indexOfUsed = line.indexOf('used:');
	if (indexOfUsed !== -1) {
	const indexOfMs = line.indexOf(' ms ');
	if (indexOfMs !== -1 && indexOfUsed > indexOfMs) {
	line = line.slice(indexOfMs + 4);
	}

	const parts = line.split(',');
	const type = parts.shift();
	return { [type]: _.reduce(parts, trimAssign, {}) };
	}

	if (line.indexOf('->') !== -1) {
	const parsed = CRAZY_REGEX.exec(line);
	if (parsed) {
	return {
	GC: {
	cumulativeMs: parsed[1],
	type: parsed[2],
	beforeHeapMB: parsed[3],
	beforeSystemMB: parsed[4],
	afterHeapMB: parsed[5],
	afterSystemMB: parsed[6],
	timeSpendGCing: parsed[7],
	externalTime: parsed[8],
	gcReason: parsed[9]
	}
	};
	}
	}

	if (line.indexOf('Heap growing factor') === 0) {
	const parsed = HEAP_REGEX.exec(line);
	if (parsed) {
	return {
	'Heap growth': {
	factor: parsed[1],
	mu: parsed[2],
	speedRatio: parsed[3],
	gc: parsed[4],
	mutator: parsed[5]
	}
	};
	}
	}

	if (line.startsWith('Grow:') \|\| line.startsWith('Dampen:')) {
	const sansSpaces = line.split(' ');
	const type = sansSpaces.shift().slice(0, -1);
	const ratio = sansSpaces.pop().slice(1, -1);

	return {
	action: _(sansSpaces)
	.join(' ')
	.split(',')
	.reduce(trimAssign, {
	type,
	ratio
	})
	};
	}

	if (line.indexOf(':') !== -1) {
	if (line.split(':').length === 2) {
	return trimAssign({}, line);
	}
	}

	debugLine('UNPROCESSED LINE', line);
	return line;
	};

	const processStream = (stream) => {
	let output = [];
	const dechunker = _.flow(chunkToLines((line) => {
	const lineParts = LINE_REGEX.exec(line);

	if (!lineParts) {
	debugLine('UNREGEXABLE LINE:', line);
	return {};
	}

	return {
	process: lineParts[2],
	data: processLine(lineParts[3])
	};
	}), (out) => {
	output = output.concat(out);
	});

	return new Promise((resolve, reject) => {
	stream.on('data', dechunker);
	stream.on('end', _.flow(dechunker, () => {
	console.error('Done Parsing!');
	resolve(output);
	}));
	});
	};

	const jsonSaver = (destination) => _.flow(
	_.bind(JSON.stringify, JSON, _, _, 2),
	_.bind(fs.writeFileAsync, fs, destination)
	);

	const dataFlattener = _.flow(
	_.property('data'),
	_.toPairs,
	_.property(0)
	);

	const isNotRubbish = _.overEvery(
	_.isArray,
	_.flow(
	_.partial(_.get, _, '0'),
	_.negate(_.overSome(
	_.partial(_.startsWith, _, '['),
	_.partial(_.endsWith, _, ' ms')
	))
	)
	);

	const aggregateToArrays = (memo, [key, val]) => {
	if (!memo[key]) {
	memo[key] = [];
	}

	memo[key].push(val);
	return memo;
	};

	const groupProcess = (process) => _(process)
	.map(dataFlattener)
	.filter(isNotRubbish)
	.reduce(aggregateToArrays, {});


	const groupData = (data) => _(data)
	.filter('process')
	.map((item) => _.set(item, 'process', item.process.split(' ').join('')))
	.groupBy('process')
	.mapValues(groupProcess)
	.value();

	const toNumber = (str) => {
	const num = Number(str) \|\| parseFloat(str) \|\| parseInt(str);
	return _.isNaN(num) ? str : num;
	};

	const aggAdd = (memo, key, item) => _.add(_.get(memo, key, 0), toNumber(item));

	const sumObject = (summed, val, key) => {
	if (_.isString(val)) {
	return summed;
	}

	return _.set((summed \|\| {}), key, aggAdd(summed, key, val));
	};

	const fixObject = (obj) => _.reduce(obj, (memo, val, key) => {
	if (ALPHA_REGEX.test(key)) {
	memo[key.split(' ').join('')] = toNumber(val);
	}

	return memo;
	}, {});

	const fixData = _.cond([
	[_.isObject, fixObject],
	[_.isString, toNumber]
	]);

	const discreteObject = (memo, item, key) => {
	if (!memo[key]) {
	memo[key] = [];
	}

	memo[key].push(item);
	return memo;
	};

	const dataSummer = (key, type, memo, item) => {
	// (key === '0'): hacky remove weird set
	if (typeof item !== type \|\| key === '0') {
	return memo;
	}

	item = fixData(item);

	const count = memo.count + 1;
	let sum, discrete;

	if (type === 'object') {
	discrete = _.reduce(item, discreteObject, (memo.discrete \|\| {}));
	sum = _.reduce(item, sumObject, memo.sum);
	} else if (type === 'string') {
	discrete = _.concat((memo.discrete \|\| []), item);
	sum = aggAdd(memo, 'sum', item);
	}

	return { key, count, sum, discrete };
	};

	const reduceToSums = (data, key) => _.reduce(
	data,
	_.partial(dataSummer, key, typeof data[0]),
	{ count: 0 }
	);

	const computeMean = (data) => {
	if (typeof data.sum === 'object') {
	data.mean = _.mapValues(data.sum, _.partial(_.divide, _, data.count));
	} else {
	data.mean = _.divide(data.sum, data.count);
	}

	return data;
	};

	const aggregateProcess = (process) => _(process)
	.mapValues(reduceToSums)
	.filter((item) => item.sum && item.count > RUBBISH_THRESHOLD)
	.map(computeMean)
	.value();

	const aggregateData = (data) => _.mapValues(data, aggregateProcess);

	const main = _.flow(getFileStream, processStream);
	main()
	.tap(jsonSaver(`${FILENAME}.json`))
	.then(groupData)
	.tap(jsonSaver(`${FILENAME}.grouped.json`))
	.then(aggregateData)
	.tap(jsonSaver(`${FILENAME}.aggregated.json`));