Skip to content

Instantly share code, notes, and snippets.

@shmohawk
Created March 30, 2018 22:54
Show Gist options
  • Save shmohawk/f6f51d585722dc1a05201ea441873702 to your computer and use it in GitHub Desktop.
Save shmohawk/f6f51d585722dc1a05201ea441873702 to your computer and use it in GitHub Desktop.
diff --git a/alfix.py b/alfix.py
index eed2f61..ee80ced 100755
--- a/alfix.py
+++ b/alfix.py
@@ -9,8 +9,11 @@ import gzip
from lxml import etree
from lxml import html
from lxml.html import builder as E
+from lxml.html import soupparser
from urllib.parse import urlparse, parse_qsl
+from subprocess import Popen, PIPE, STDOUT
+
from http import HTTPStatus
myname = 'Alfix'
@@ -84,13 +87,16 @@ def mkxform(db):
xform = {}
for r in rows:
xslpath = r['XSL']
- xslt_root = etree.parse(open('%s/%s' % (web_base, xslpath)))
+ #xslt_root = etree.parse(open('%s/%s' % (web_base, xslpath)))
# XXX hack: lxml barfs on some javascript thing from xslt, drop it
- for x in xslt_root.xpath('//script'):
- x.getparent().remove(x)
+ #for x in xslt_root.xpath('//script'):
+ # x.getparent().remove(x)
+
+ #xform[r['section_type']] = etree.XSLT(xslt_root)
- xform[r['section_type']] = etree.XSLT(xslt_root)
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843
+ xform[r['section_type']] = '%s/%s' % (web_base, xslpath)
return xform
@@ -159,13 +165,28 @@ class myHandler(http.server.SimpleHTTPRequestHandler):
'and element.id=%s' % q.get('elemid')
], order=False)
sect_type = fetch(self.db, xsql)[0]['TYPE']
- transform = self.xform[sect_type]
+
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843
+ #transform = self.xform[sect_type]
+
+ def transform(e, codep='', cdPath='', linkUrl=''):
+ p = Popen([
+ '/usr/bin/env', 'xsltproc', self.xform[sect_type],
+ '--param', 'codep', codep,
+ '--param', 'cdPath', cdPath,
+ '--param', 'linkUrl', linkUrl,
+ '-'], stdout=PIPE, stdin=PIPE, stderr=PIPE)
+ ret = p.communicate(input=e.encode('utf-8'))
+
+ return soupparser.fromstring(ret[0])
+
contents = self.page.get_element_by_id('contents')
for i in fetch(self.db, sql):
- doc = etree.fromstring(i['VALUE_XML'])
+ #doc = soupparser.fromstring(i['VALUE_XML'])
+ doc = i['VALUE_XML']
res = transform(doc, codep='0', cdPath='"./"', linkUrl='""')
- tbl = res.find('//table')
+ tbl = res.find('.//table')
contents.append(tbl)
diff --git a/mdb2sql.sh b/mdb2sql.sh
index ffba647..3c4ee73 100755
--- a/mdb2sql.sh
+++ b/mdb2sql.sh
@@ -26,4 +26,5 @@ done
echo "regenerating searchable text..."
./xml2txt.py ${dbfile}
+#sqlite3 ${dbfile} "CREATE VIRTUAL TABLE fts USING fts4(ft text, id INTEGER); INSERT INTO fts SELECT value_xml,id FROM xml"
echo "done"
diff --git a/setup.sh b/setup.sh
index fdac590..7f8ccad 100755
--- a/setup.sh
+++ b/setup.sh
@@ -35,3 +35,5 @@ ln -s ../../../alfix.css app/Web/css/
perl -pi -e "s:a.getDocumentElement\(\):a.documentElement:g" app/Web/svgscript/mysvg.js
./mdb2sql.sh "$cdpath"
+
+./xsl_js.py
diff --git a/xml2txt.py b/xml2txt.py
index 2335a3f..a4401d9 100755
--- a/xml2txt.py
+++ b/xml2txt.py
@@ -2,19 +2,31 @@
import sqlite3
import sys
+from lxml.html import soupparser
from lxml import etree
from alfix import mkquery, fetch, mkxform
from alfix import web_base, dbpath
+from subprocess import Popen, PIPE, STDOUT
+
def dofoo(db, xform, row):
- transform = xform[row['TYPE']]
- doc = etree.fromstring(row['VALUE_XML'])
- res = transform(doc, codep='0', cdPath='"./"', linkUrl='""')
+ #transform = xform[row['TYPE']]
+ # XSL hack: https://bugs.launchpad.net/lxml/+bug/1759843
+ def transform(e, codep='', cdPath='', linkUrl=''):
+ p = Popen([
+ '/usr/bin/env', 'xsltproc', xform[row['TYPE']],
+ '--param', 'codep', codep,
+ '--param', 'cdPath', cdPath,
+ '--param', 'linkUrl', linkUrl,
+ '-'], stdout=PIPE, stdin=PIPE, stderr=PIPE)
+ ret = p.communicate(input=e.encode('utf-8'))
+ return soupparser.fromstring(ret[0])
- txt = []
- for t in res.getroot().itertext():
- txt.append(t)
+ #doc = soupparser.fromstring(row['VALUE_XML'])
+ doc = row['VALUE_XML']
+ res = transform(doc, codep='0', cdPath='"./"', linkUrl='""')
+ txt = [r for r in res.itertext()]
return ' '.join(txt)
diff --git a/xsl_js.py b/xsl_js.py
index e69de29..f84400e 100755
--- a/xsl_js.py
+++ b/xsl_js.py
@@ -0,0 +1,28 @@
+#!/usr/bin/python3
+
+from alfix import mkxform, dbpath
+import re
+import sys
+import sqlite3
+
+RE_JS = re.compile('<script.*?">[\s\S]*?</script>')
+
+if __name__ == '__main__':
+ if len(sys.argv) > 1:
+ dbpath = sys.argv[1]
+
+ db = sqlite3.connect(dbpath)
+ db.row_factory = sqlite3.Row
+
+
+ for x in mkxform(db).values():
+ with open(x) as o:
+ data = o.read()
+ o.close()
+
+ data = RE_JS.sub('', data)
+
+ with open(x, 'w') as o:
+ o.write(data)
+ o.close()
+
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment