Created
October 14, 2014 21:39
-
-
Save xavivars/38ecea31809d72081a81 to your computer and use it in GitHub Desktop.
APY unknown performance
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
# vim: set ts=4 sw=4 sts=4 et : | |
import sqlite3, re | |
from datetime import datetime | |
missingFreqsDBConn = None | |
unknownMarkRE = re.compile(r'\*([^.,;:\t\* ]+)') | |
def stripUnknownMarks(text): | |
print "[ str: ", datetime.now() | |
a = re.sub(unknownMarkRE, r'\1', text) | |
print "] str: ", datetime.now() | |
return a | |
def noteUnknownTokens(text, pair): | |
print "[ re: ", datetime.now() | |
print pair, text | |
for token in re.findall(unknownMarkRE, text): | |
print "->re: ", datetime.now() | |
noteUnknownToken(token, pair, 'this.db') | |
print "] re: ", datetime.now() | |
def noteUnknownToken(token, pair, dbPath): | |
print "[ sql: ", datetime.now() | |
global missingFreqsDBConn | |
if not missingFreqsDBConn: | |
missingFreqsDBConn = sqlite3.connect(dbPath) | |
c = missingFreqsDBConn.cursor() | |
c.execute('CREATE TABLE IF NOT EXISTS missingFreqs (pair TEXT, token TEXT, frequency INTEGER, UNIQUE(pair, token))') | |
c.execute('INSERT OR REPLACE INTO missingFreqs VALUES (:pair, :token, COALESCE((SELECT frequency FROM missingFreqs WHERE pair=:pair AND token=:token), 0) + 1)', {'pair': pair, 'token': token}) | |
missingFreqsDBConn.commit() | |
print "] sql: ", datetime.now() | |
longText = '*sampleWord *sampleWord *sampleWord *sampleWsord' | |
if __name__ == '__main__': | |
noteUnknownTokens(longText, 'spa-cat') | |
print stripUnknownMarks(longText) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment