Created
September 29, 2019 16:43
-
-
Save bkatiemills/82eab3650e75d90733c2b06a79ddbf19 to your computer and use it in GitHub Desktop.
fun with byte serialization
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import io, pickle, sqlite3, sys, pandas | |
import numpy as np | |
def parse(results): | |
return results.apply(unpack_qc) | |
def unpack_qc(value): | |
print(value, type(value)) | |
qc = np.load(io.BytesIO(value)) | |
return qc | |
def pack_array(arr): | |
out = io.BytesIO() | |
if type(arr) is np.ndarray: | |
np.save(out, arr) | |
out.seek(0) | |
return sqlite3.Binary(out.read()) | |
def dbinteract(command, values=[], tries=0): | |
''' | |
execute the given SQL command; | |
catch errors and retry a maximum number of times; | |
''' | |
max_retry = 100 | |
conn = sqlite3.connect('serialize.db', isolation_level=None, timeout=60) | |
cur = conn.cursor() | |
try: | |
cur.execute(command, values) | |
try: | |
result = cur.fetchall() | |
except: | |
result = None | |
cur.close() | |
conn.close() | |
return result | |
except: | |
print('bad db request') | |
print(command) | |
print(values) | |
print(sys.exc_info()) | |
conn.rollback() | |
cur.close() | |
conn.close() | |
if tries < max_retry: | |
dbinteract(command, values, tries+1) | |
else: | |
print('database interaction failed after', max_retry, 'retries') | |
return -1 | |
conn = sqlite3.connect('serialize.db', isolation_level=None) | |
cur = conn.cursor() | |
query = "CREATE TABLE IF NOT EXISTS cereal (truth BLOB, uid INTERGER PRIMARY KEY);" | |
cur.execute(query) | |
# ends in zero | |
data = np.zeros(2, dtype=bool) | |
query = "INSERT INTO cereal VALUES(?,?);" | |
dbinteract(query, [pack_array(data), 0] ) | |
# doesnt end in zero | |
data = np.ones(3, dtype=bool) | |
query = "INSERT INTO cereal VALUES(?,?);" | |
dbinteract(query, [pack_array(data), 1] ) | |
data = np.zeros(5, dtype=bool) | |
print('\n===== unpack without a bytes typecast directly, works fine =====\n') | |
query = 'SELECT uid, truth FROM cereal;' | |
cur.execute(query) | |
rawresults = cur.fetchall() | |
print(unpack_qc(rawresults[0][1])) | |
print(unpack_qc(rawresults[1][1])) | |
print('\n===== unpack without a bytes typecast from df, works fine =====\n') | |
df = pandas.DataFrame(rawresults) | |
df.columns = ['uid', 'truth'] | |
df[['truth']] = df[['truth']].apply(parse) | |
print(df['truth']) | |
print('\n===== unpack with a bytes typecast from df, fails =====\n') | |
df = pandas.DataFrame(rawresults).astype('bytes') | |
df.columns = ['uid', 'truth'] | |
df[['truth']] = df[['truth']].apply(parse) | |
print(df['truth']) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment