Created
August 3, 2019 21:15
-
-
Save stockhuman/cbdcf5c655abe87a660782a3add74d53 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// install dependencies such as this with 'npm install xlsx' | |
const xlsx = require('xlsx') | |
const { gray, cyan, red, yellow, blue, black, green } = require('ansi-colors') | |
/** | |
* USAGE | |
* | |
* node filter.js <searchterm> <file to search> <data file> | |
*/ | |
const query = process.argv[2] // the term we're looking for | |
const qFile = process.argv[3] // declares the file we're extracting terms from | |
const dFile = process.argv[4] // the file we will search in with results from query | |
const log = msg => { | |
console.log(black.bgWhite('[LOG]') + ' ' + gray(msg)) | |
} | |
const success = msg => { | |
log(green(msg)) | |
} | |
const createSpreadSheet = (data, name) => { | |
const outSheet = xlsx.utils.json_to_sheet(data) | |
// create workbook and export | |
const wb = xlsx.utils.book_new(); | |
xlsx.utils.book_append_sheet(wb, outSheet, name); | |
xlsx.writeFile(wb, `./${name}.xlsx`) | |
success(`${name}.xlsx written to disk`) | |
} | |
if (process.argv[2] == 'help') { | |
log(`${cyan('USAGE:')}`) | |
log(gray(`node filter.js ${yellow('<searchterm>')} ${blue('<file to search>')} ${red('<data file>')}`)) | |
log(`${cyan('EX:')} node filter.js ${yellow('ABC')} ${blue('DMP.xlsx')} ${red('10kMethSorted.xlsx')}`) | |
process.exit(0) | |
} | |
// feedback | |
log('Looking for ' + blue(query)) | |
let results = [] | |
let now = new Date().toLocaleTimeString('fr-ca').replace(' ', '').replace(/:/g, '') | |
let output = `${query}_${now}` | |
const wkbk = xlsx.readFile(qFile, { raw: true }) | |
wkbk.SheetNames.forEach(sheetName => { // in each sheet... | |
// convert sheet into JSON rows, this is our row array | |
const rows = xlsx.utils | |
.sheet_to_row_object_array(wkbk.Sheets[sheetName]) | |
.filter(row => | |
row['UCSC_RefGene_Name'] && | |
typeof row['UCSC_RefGene_Name'] == 'string' // filters out bad data | |
) | |
// A row looks like this | |
//{ | |
// Name: 'cg18638434', | |
// UCSC_RefGene_Name: 'C14orf45;ENTPD5', | |
// Relation_to_Island: 'S_Shore', | |
// logFC: 4.13212463502205 | |
// }, | |
// for each row, alias the data as 'patient' | |
rows.forEach(row => { | |
const { Name, Relation_to_Island, logFC, UCSC_RefGene_Name } = row | |
try { | |
if (UCSC_RefGene_Name.split(';').includes(query)) { | |
results.push({ | |
Name, | |
Relation_to_Island, | |
UCSC_RefGene_Name, | |
logFC | |
}) | |
} | |
} catch (error) { | |
log(`${red('Malformed Data:')}`) | |
console.log(row) | |
} | |
}) | |
// save results | |
if (results.length > 0) { | |
log(`Found ${yellow(results.length)} results`) | |
createSpreadSheet(results, output) | |
} | |
else { log('No results found'); process.exit(0) } | |
}) | |
// read the file we're looking into | |
const wkbkdata = xlsx.readFile(dFile) | |
log('Loaded data file') | |
wkbkdata.SheetNames.forEach(sheetName => { | |
const rows = xlsx.utils.sheet_to_row_object_array(wkbkdata.Sheets[sheetName]) | |
const out = [] | |
rows.forEach(row => { | |
for (let index = 0; index < results.length; index++) { | |
if (results[index].Name === row.Name) { | |
out.push(row) | |
} | |
} | |
}) | |
log(`Found ${yellow(out.length)} matching CGRP rows`) | |
if (out.length > 0) { | |
const e = xlsx.readFile(`./${output}.xlsx`, { raw: true }) | |
xlsx.utils.book_append_sheet(e, xlsx.utils.json_to_sheet(out), 'Matches') | |
xlsx.writeFile(e, `./${output}.xlsx`) | |
success('Data written to ' + output) | |
} | |
}) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment