Last active
September 22, 2019 13:13
-
-
Save larshb/404800341165bd2db19615335e73c4f9 to your computer and use it in GitHub Desktop.
Python script (wrapper for jsfiddle-downloader) for downloadning and modularizing fiddles.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from logging import * | |
LOGLEVEL = DEBUG | |
USE_NODE = True | |
CLEAN = True | |
FILES = ['.html', '.js', '.css'] | |
basicConfig(level=LOGLEVEL) | |
CUSTOM_REPLACEMENTS = { | |
"API reference": ("API = '//", "API = 'https://") | |
} |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from os.path import dirname, realpath, join, exists | |
from subprocess import check_output | |
from requests import get | |
from config import * | |
URL = "https://fiddle.jshell.net/{id}/show/light/" | |
JQUERY = 'https://ajax.googleapis.com/ajax/libs/jquery/3.4.1/jquery.min.js' | |
try: | |
CUSTOM_REPLACEMENTS | |
except: | |
CUSTOM_REPLACEMENTS = { | |
"Dummy replacement": (r"dummy pattern", "dummy repl") | |
} | |
class module: | |
class js: | |
modtag = 'Javascript' | |
filext = '.js' | |
inline = r"<script type=.text/javascript.>//<!.CDATA.(.*?)//]]></script>" | |
linked = f'<script defer type="text/javascript" src="{filext}"></script>' | |
class css: | |
modtag = 'Stylesheet' | |
filext = '.css' | |
inline = r"<style.*?compiled.*?>(.*?)</style>" | |
linked = f'<link rel="stylesheet" id="compiled-css" type="text/css" href="{filext}">' | |
class Fiddle: | |
RAW_FILE = '.raw' | |
def __init__(self, id, load=True, useCache=True, parse=True): | |
self.component = {} | |
self.id = id | |
if '/' in id: | |
critical('Username should be omitted from fiddle ID') | |
self.url = URL.format(id=id) | |
debug('URL: ' + self.url) | |
if load: self.load(useCache=useCache) | |
if parse: self.parse() | |
def load(self, cache=True, useNpm=True, useCache=False): | |
if useNpm: | |
if useCache and exists(self.RAW_FILE): | |
info('Using cached download') | |
else: | |
if useCache: error('Cache not found, downloading') | |
debug('NPM DL allways cached') | |
self.loadNpm() | |
self.raw=open(self.RAW_FILE).read() | |
else: | |
r = get(self.url) | |
if r.ok: | |
self.raw = r.text | |
info('Fiddle content loaded') | |
if cache: | |
open(RAW_FILE, 'w').write(self.raw) | |
info('Fiddle content saved to file') | |
else: | |
error('GET error: ' + str(r.status_code) + ' ' + reason) | |
def loadNpm(self): | |
import os | |
from subprocess import call | |
node = "jsfiddle-downloader" | |
info('Using Node ' + node) | |
output = self.RAW_FILE | |
if os.path.exists(output): | |
try: os.remove(output) | |
except Exception as err: error(err) | |
cmd = f"{node} -i {self.id} -o {output} -v" | |
debug(cmd) | |
log = check_output(cmd, shell=True).decode() | |
for line in log.splitlines(): | |
debug(node + ': ' + line) | |
def parse(self, customReplacements=CUSTOM_REPLACEMENTS): | |
import re | |
html = self.raw | |
open('.raw', 'w').write(html) | |
replacements = { | |
"Injected script": (r"<script>.*tell the embed.*?<\/script>", ""), | |
"CoffeeScript todo": ('<!-- TODO: Missing CoffeeScript 2 -->', ''), | |
"jQuery reference": (r"<script.*?jquery.*?<\/script>", f'<script src="{JQUERY}"></script>'), | |
"jsFiddle CSS": (r"<link.*?result-light.*?>", "") | |
} | |
replacements.update(customReplacements) | |
for name, (pattern, repl) in replacements.items(): | |
info('Replacing ' + name) | |
html, n = re.subn(pattern, repl, html, flags=re.DOTALL) | |
if n < 1: | |
warning('Unable to replace ' + name) | |
# Get compoenents | |
for mod in [module.js, module.css]: | |
name = mod.modtag | |
filename = mod.filext | |
pattern = mod.inline | |
substitution = mod.linked | |
matches = re.search(pattern, html, flags=re.DOTALL) | |
if matches: | |
comp = matches.group(1).lstrip() | |
self.component[name] = comp | |
open(filename, 'w').write(comp) | |
info(f"{name} stored") | |
else: | |
critical(f"{name} not found") | |
html, n = re.subn(pattern, substitution, html, flags=re.DOTALL) | |
if n < 1: | |
error(f'Unable to inject linked {name} module') | |
open('.html', 'w').write(html) | |
info('HTML stored') | |
def main(argv): | |
if CLEAN: | |
import os | |
for f in FILES: | |
if exists(f): | |
os.remove(f) | |
try: | |
import coloredlogs | |
coloredlogs.install(level=LOGLEVEL) | |
except: | |
info("Colored logs not installed") | |
debug('Debug level logging set') | |
if len(argv) < 2: | |
print("Usage: %s [<user>/]<fiddle id>" % argv[0]) | |
return 1 | |
id = argv[1] | |
info("Scraping fiddle " + id) | |
f = Fiddle(id) | |
return 0 | |
if __name__ == '__main__': | |
from sys import argv | |
e = main(argv) | |
exit(e) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment