Skip to content

Instantly share code, notes, and snippets.

@bkatiemills
Created September 29, 2019 16:43
Show Gist options
  • Save bkatiemills/82eab3650e75d90733c2b06a79ddbf19 to your computer and use it in GitHub Desktop.
Save bkatiemills/82eab3650e75d90733c2b06a79ddbf19 to your computer and use it in GitHub Desktop.
fun with byte serialization
import io, pickle, sqlite3, sys, pandas
import numpy as np
def parse(results):
return results.apply(unpack_qc)
def unpack_qc(value):
print(value, type(value))
qc = np.load(io.BytesIO(value))
return qc
def pack_array(arr):
out = io.BytesIO()
if type(arr) is np.ndarray:
np.save(out, arr)
out.seek(0)
return sqlite3.Binary(out.read())
def dbinteract(command, values=[], tries=0):
'''
execute the given SQL command;
catch errors and retry a maximum number of times;
'''
max_retry = 100
conn = sqlite3.connect('serialize.db', isolation_level=None, timeout=60)
cur = conn.cursor()
try:
cur.execute(command, values)
try:
result = cur.fetchall()
except:
result = None
cur.close()
conn.close()
return result
except:
print('bad db request')
print(command)
print(values)
print(sys.exc_info())
conn.rollback()
cur.close()
conn.close()
if tries < max_retry:
dbinteract(command, values, tries+1)
else:
print('database interaction failed after', max_retry, 'retries')
return -1
conn = sqlite3.connect('serialize.db', isolation_level=None)
cur = conn.cursor()
query = "CREATE TABLE IF NOT EXISTS cereal (truth BLOB, uid INTERGER PRIMARY KEY);"
cur.execute(query)
# ends in zero
data = np.zeros(2, dtype=bool)
query = "INSERT INTO cereal VALUES(?,?);"
dbinteract(query, [pack_array(data), 0] )
# doesnt end in zero
data = np.ones(3, dtype=bool)
query = "INSERT INTO cereal VALUES(?,?);"
dbinteract(query, [pack_array(data), 1] )
data = np.zeros(5, dtype=bool)
print('\n===== unpack without a bytes typecast directly, works fine =====\n')
query = 'SELECT uid, truth FROM cereal;'
cur.execute(query)
rawresults = cur.fetchall()
print(unpack_qc(rawresults[0][1]))
print(unpack_qc(rawresults[1][1]))
print('\n===== unpack without a bytes typecast from df, works fine =====\n')
df = pandas.DataFrame(rawresults)
df.columns = ['uid', 'truth']
df[['truth']] = df[['truth']].apply(parse)
print(df['truth'])
print('\n===== unpack with a bytes typecast from df, fails =====\n')
df = pandas.DataFrame(rawresults).astype('bytes')
df.columns = ['uid', 'truth']
df[['truth']] = df[['truth']].apply(parse)
print(df['truth'])
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment