Created
March 30, 2018 22:54
-
-
Save shmohawk/f6f51d585722dc1a05201ea441873702 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
diff --git a/alfix.py b/alfix.py | |
index eed2f61..ee80ced 100755 | |
--- a/alfix.py | |
+++ b/alfix.py | |
@@ -9,8 +9,11 @@ import gzip | |
from lxml import etree | |
from lxml import html | |
from lxml.html import builder as E | |
+from lxml.html import soupparser | |
from urllib.parse import urlparse, parse_qsl | |
+from subprocess import Popen, PIPE, STDOUT | |
+ | |
from http import HTTPStatus | |
myname = 'Alfix' | |
@@ -84,13 +87,16 @@ def mkxform(db): | |
xform = {} | |
for r in rows: | |
xslpath = r['XSL'] | |
- xslt_root = etree.parse(open('%s/%s' % (web_base, xslpath))) | |
+ #xslt_root = etree.parse(open('%s/%s' % (web_base, xslpath))) | |
# XXX hack: lxml barfs on some javascript thing from xslt, drop it | |
- for x in xslt_root.xpath('//script'): | |
- x.getparent().remove(x) | |
+ #for x in xslt_root.xpath('//script'): | |
+ # x.getparent().remove(x) | |
+ | |
+ #xform[r['section_type']] = etree.XSLT(xslt_root) | |
- xform[r['section_type']] = etree.XSLT(xslt_root) | |
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843 | |
+ xform[r['section_type']] = '%s/%s' % (web_base, xslpath) | |
return xform | |
@@ -159,13 +165,28 @@ class myHandler(http.server.SimpleHTTPRequestHandler): | |
'and element.id=%s' % q.get('elemid') | |
], order=False) | |
sect_type = fetch(self.db, xsql)[0]['TYPE'] | |
- transform = self.xform[sect_type] | |
+ | |
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843 | |
+ #transform = self.xform[sect_type] | |
+ | |
+ def transform(e, codep='', cdPath='', linkUrl=''): | |
+ p = Popen([ | |
+ '/usr/bin/env', 'xsltproc', self.xform[sect_type], | |
+ '--param', 'codep', codep, | |
+ '--param', 'cdPath', cdPath, | |
+ '--param', 'linkUrl', linkUrl, | |
+ '-'], stdout=PIPE, stdin=PIPE, stderr=PIPE) | |
+ ret = p.communicate(input=e.encode('utf-8')) | |
+ | |
+ return soupparser.fromstring(ret[0]) | |
+ | |
contents = self.page.get_element_by_id('contents') | |
for i in fetch(self.db, sql): | |
- doc = etree.fromstring(i['VALUE_XML']) | |
+ #doc = soupparser.fromstring(i['VALUE_XML']) | |
+ doc = i['VALUE_XML'] | |
res = transform(doc, codep='0', cdPath='"./"', linkUrl='""') | |
- tbl = res.find('//table') | |
+ tbl = res.find('.//table') | |
contents.append(tbl) | |
diff --git a/mdb2sql.sh b/mdb2sql.sh | |
index ffba647..3c4ee73 100755 | |
--- a/mdb2sql.sh | |
+++ b/mdb2sql.sh | |
@@ -26,4 +26,5 @@ done | |
echo "regenerating searchable text..." | |
./xml2txt.py ${dbfile} | |
+#sqlite3 ${dbfile} "CREATE VIRTUAL TABLE fts USING fts4(ft text, id INTEGER); INSERT INTO fts SELECT value_xml,id FROM xml" | |
echo "done" | |
diff --git a/setup.sh b/setup.sh | |
index fdac590..7f8ccad 100755 | |
--- a/setup.sh | |
+++ b/setup.sh | |
@@ -35,3 +35,5 @@ ln -s ../../../alfix.css app/Web/css/ | |
perl -pi -e "s:a.getDocumentElement\(\):a.documentElement:g" app/Web/svgscript/mysvg.js | |
./mdb2sql.sh "$cdpath" | |
+ | |
+./xsl_js.py | |
diff --git a/xml2txt.py b/xml2txt.py | |
index 2335a3f..a4401d9 100755 | |
--- a/xml2txt.py | |
+++ b/xml2txt.py | |
@@ -2,19 +2,31 @@ | |
import sqlite3 | |
import sys | |
+from lxml.html import soupparser | |
from lxml import etree | |
from alfix import mkquery, fetch, mkxform | |
from alfix import web_base, dbpath | |
+from subprocess import Popen, PIPE, STDOUT | |
+ | |
def dofoo(db, xform, row): | |
- transform = xform[row['TYPE']] | |
- doc = etree.fromstring(row['VALUE_XML']) | |
- res = transform(doc, codep='0', cdPath='"./"', linkUrl='""') | |
+ #transform = xform[row['TYPE']] | |
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843 | |
+ def transform(e, codep='', cdPath='', linkUrl=''): | |
+ p = Popen([ | |
+ '/usr/bin/env', 'xsltproc', xform[row['TYPE']], | |
+ '--param', 'codep', codep, | |
+ '--param', 'cdPath', cdPath, | |
+ '--param', 'linkUrl', linkUrl, | |
+ '-'], stdout=PIPE, stdin=PIPE, stderr=PIPE) | |
+ ret = p.communicate(input=e.encode('utf-8')) | |
+ return soupparser.fromstring(ret[0]) | |
- txt = [] | |
- for t in res.getroot().itertext(): | |
- txt.append(t) | |
+ #doc = soupparser.fromstring(row['VALUE_XML']) | |
+ doc = row['VALUE_XML'] | |
+ res = transform(doc, codep='0', cdPath='"./"', linkUrl='""') | |
+ txt = [r for r in res.itertext()] | |
return ' '.join(txt) | |
diff --git a/xsl_js.py b/xsl_js.py | |
index e69de29..f84400e 100755 | |
--- a/xsl_js.py | |
+++ b/xsl_js.py | |
@@ -0,0 +1,28 @@ | |
+#!/usr/bin/python3 | |
+ | |
+from alfix import mkxform, dbpath | |
+import re | |
+import sys | |
+import sqlite3 | |
+ | |
+RE_JS = re.compile('<script.*?">[\s\S]*?</script>') | |
+ | |
+if __name__ == '__main__': | |
+ if len(sys.argv) > 1: | |
+ dbpath = sys.argv[1] | |
+ | |
+ db = sqlite3.connect(dbpath) | |
+ db.row_factory = sqlite3.Row | |
+ | |
+ | |
+ for x in mkxform(db).values(): | |
+ with open(x) as o: | |
+ data = o.read() | |
+ o.close() | |
+ | |
+ data = RE_JS.sub('', data) | |
+ | |
+ with open(x, 'w') as o: | |
+ o.write(data) | |
+ o.close() | |
+ |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment