Last active
January 11, 2022 12:16
-
-
Save srghma/ab57d62080ad052f4f0938deb3d60787 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
(function(console){ | |
console.save = function(data, filename){ | |
if(!data) { | |
console.error('Console.save: No data') | |
return; | |
} | |
if(!filename) filename = 'console.json' | |
if(typeof data === "object"){ | |
data = JSON.stringify(data, undefined, 4) | |
} | |
var blob = new Blob([data], {type: 'text/json'}), | |
e = document.createEvent('MouseEvents'), | |
a = document.createElement('a') | |
a.download = filename | |
a.href = window.URL.createObjectURL(blob) | |
a.dataset.downloadurl = ['text/json', a.download, a.href].join(':') | |
e.initMouseEvent('click', true, false, window, 0, 0, 0, 0, 0, false, false, false, false, 0, null) | |
a.dispatchEvent(e) | |
} | |
})(console) | |
/////////////// | |
const delay = milliseconds => new Promise(resolve => { | |
setTimeout(resolve, milliseconds); | |
}); | |
async function download(url, name) { | |
const a = document.createElement('a'); | |
a.download = name; | |
a.href = url; | |
a.style.display = 'none'; | |
document.body.append(a); | |
a.click(); | |
// Chrome requires the timeout | |
await delay(100); | |
a.remove(); | |
}; | |
// TODO: limit is 10 in chrome simultaneous requests | |
async function multiDownload(urlAndNames) { | |
if (!urlAndNames) { | |
throw new Error('`urlAndName` required'); | |
} | |
for (const [index, { url, name }] of urlAndNames.entries()) { | |
await delay(index * 1000); | |
download(url, name); | |
} | |
} | |
/////////////// | |
links = [ | |
"vectorfields", | |
"electrostatics", | |
"magnetostatics", | |
"faraday", | |
"light", | |
].map(async page => { | |
const pageHref = `https://ocw.mit.edu/courses/physics/8-02-physics-ii-electricity-and-magnetism-spring-2007/visualizations/${page}` | |
let pageText = await fetch(pageHref) | |
pageText = await pageText.text() | |
const pageDom = (new DOMParser()).parseFromString(pageText, "text/html"); | |
let pageLinks = Array.from(pageDom.documentElement.querySelectorAll('#course_inner_section .thumbnail a')) | |
pageLinks = pageLinks.map(x => ({ text: x.textContent, href: x.href })) | |
return { page, pageLinks } | |
// x = await Promise.all(pageLinks.map(async x => { | |
// let href = x.href.replace(/^http/g, 'https') | |
// let t = await fetch(href) | |
// t = await t.text() | |
// return { link: href, text: t, name: x.textContent.replace('.', '') } | |
// })) | |
}) | |
links = await Promise.all(links) | |
// console.save(JSON.stringify(links, null, 2)) | |
////////////////////// | |
links = require(`/home/srghma/Downloads/console.json`) | |
output_dir_path = '/home/srghma/Downloads/mit-magnetisc-visualizations' | |
require('child_process').execSync(`rm -rfd ${output_dir_path}`, { stdio: 'inherit' }) | |
links.map(({ page, pageLinks }, pageIndex) => { | |
pageLinks.map(async ({ href, text }, subpageIndex) => { | |
const outputPath = `${output_dir_path}/${page}/${subpageIndex + 1}-${text.replace('.', '')}` | |
require('mkdirp').sync(outputPath) | |
// console.log({ outputPath }) | |
// command = `wget --recursive --page-requisites --adjust-extension --span-hosts --convert-links --restrict-file-names=windows --no-parent -P "${outputPath}" "${href}"` | |
// command = `wget --mirror --no-clobber --page-requisites --adjust-extension --span-hosts --convert-links --restrict-file-names=windows --no-parent -P "${outputPath}" "${href}"` | |
command = `cd "${outputPath}" && httrack --continue "${href}" +web.mit.edu/*.jnlp` | |
console.log(command) | |
require('child_process').execSync(command, { stdio: 'inherit' }) | |
}) | |
}) | |
// z = x.map((x, index) => { | |
// // const oParser = new DOMParser(); | |
// // const dom = oParser.parseFromString(x.text, "text/html"); | |
// // let baseEl = dom.createElement('base'); | |
// // baseEl.setAttribute('href', x.link); | |
// // dom.head.append(baseEl); | |
// let link = x.text.matchAll(/VISUALIZATION.*?\<a href="([^"]+)/g) | |
// link = (Array.from(link) || []).map(x => x[1]) | |
// link = link[0] | |
// // let a = dom.documentElement.querySelectorAll('a') | |
// // a = Array.from(a) | |
// // a = a.find(x => x.attributes.href.value === link) | |
// // if (!a) { throw new Error() } | |
// // const downloadHref = a.href | |
// const downloadHref = new URL(link, x.link).href | |
// const downloadHrefExt = downloadHref.split('.').pop() | |
// const downloadName = `${index + 1}-${x.name}.${downloadHrefExt}` | |
// console.log(link, x.link) | |
// return { url: downloadHref, name: downloadName } | |
// }) | |
// console.log(z) | |
// multiDownload(z) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment