Created
September 21, 2015 11:21
-
-
Save ZwodahS/d627dd3210fcf4d3a613 to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
""" | |
data_matrix.py | |
Author: Eric | |
github: ZwodahS | |
data_matrix is a simple modules that helps you count on a N-dimension matrix. | |
terminology: | |
data: a single entry in the matrix | |
tag: a tag is a single label given to a data | |
tags is like the row/column in a 2dimension matrix | |
""" | |
INT="INT" | |
STRING="STR" | |
BOOL="BOOL" | |
RANGE="RANGE" # only for int | |
DISTINCT="DISTINCT" | |
class DataMatrix(object): | |
def __init__(self, tags): | |
""" | |
tags : defines what each data point are and their valid values | |
{ | |
"<name>" : { "type": (INT | STR), "range": (RANGE | DISTINCT) } | |
} | |
""" | |
self._init_tags(tags) | |
def _init_tags(self, tags): | |
for name, tag in tags.items(): | |
self._assert_type(tag.get("type")) | |
self._assert_range(tag.get("range"), tag.get("type")) | |
self.tags = tags | |
self._datas = [] | |
self._matrix = {} | |
self._tag_order = [ tag_name for tag_name in tags ] # just fix on an order, doesn't really matter for now | |
def _assert_type(self, data_type): | |
if data_type not in (INT, STRING, BOOL): | |
raise Exception("Invalid data type {0}".format(data_type)) | |
def _assert_range(self, data_range, data_type): | |
valid = tuple() | |
if data_type == INT: | |
valid = (RANGE, DISTINCT) | |
elif data_type == STRING: | |
valid = (DISTINCT, ) | |
elif data_type == BOOL: | |
valid = (DISTINCT, ) | |
if data_range not in valid: | |
raise Exception("Invalid tag range '{0}' for type '{1}'".format(data_range, data_type)) | |
def _clean_value_for_tag(self, tag_name, value): | |
tag = self.tags.get(tag_name) | |
if tag is None: | |
raise Exception("Invalid name for tag {0}".format(tag_name)) | |
if tag["type"] == INT: | |
try: | |
value = int(value) | |
except Exception as e: | |
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value)) | |
elif tag["type"] == STRING: | |
if not isinstance(value, str): | |
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value)) | |
elif tag["type"] == BOOL: | |
try: | |
value = bool(value) | |
except Exception as e: | |
raise Exception("Invalid value for tag {0} : {1}".format(tag_name, value)) | |
return value | |
def set_data(self, data, **tags): | |
"""If data is none, only increase count | |
""" | |
if data is not None: | |
self._datas.append((data, tags)) | |
for tag_name in tags: | |
if tag_name not in self.tags: | |
raise Exception("tag {0} is not in defined tag".format(tag_name)) | |
if len(tags) != len(self.tags): | |
raise Exception("All data must be tag to all defined tags") | |
ordered = [ tags.get(tag_name) for tag_name in self._tag_order ] | |
current = self._matrix | |
for ordered_value in ordered: | |
if ordered_value not in current: | |
current[ordered_value] = {} | |
current = current[ordered_value] | |
if "count" not in current: | |
current["count"] = 0 | |
current["count"] += 1 | |
def get_count(self, **tags): | |
tag_query = [] | |
for tag_name in self._tag_order: | |
if tag_name in tags: | |
tag_query.append(tags.get(tag_name)) | |
else: | |
tag_query.append(None) | |
return self._get_count(self._matrix, tag_query) | |
def _get_count(self, current_data, tags_list): | |
if current_data is None: | |
return 0 | |
if len(tags_list) == 0: | |
return current_data.get("count") | |
current_tag = tags_list[0] | |
if current_tag is None: | |
counts = [ self._get_count(d, tags_list[1:]) for _, d in current_data.items() ] | |
return sum(counts) | |
else: | |
d = current_data.get(current_tag) | |
return self._get_count(d, tags_list[1:]) | |
if __name__ == "__main__": | |
matrix = DataMatrix(tags={ | |
"school": { "type": STRING, "range": DISTINCT}, | |
"age": {"type": INT, "range": RANGE}, | |
"gender": {"type": STRING, "range": DISTINCT}, | |
"town": {"type": STRING, "range": DISTINCT}, | |
"vegetarian": {"type": BOOL, "range": DISTINCT}, | |
}) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_A", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="M", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=7, gender="F", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=7, gender="F", town="Town_A", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_B", age=8, gender="F", town="Town_B", vegetarian=False) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True) | |
matrix.set_data(None, school="School_A", age=8, gender="M", town="Town_B", vegetarian=True) | |
assert matrix.get_count(school="School_A") == 51 | |
assert matrix.get_count(school="School_B") == 48 | |
assert matrix.get_count(gender="M") == 45 | |
assert matrix.get_count(gender="F") == 54 | |
assert matrix.get_count(age=7) == 48 | |
assert matrix.get_count(age=8) == 51 | |
assert matrix.get_count(vegetarian=True) == 51 | |
assert matrix.get_count(vegetarian=False) == 48 | |
assert matrix.get_count(town="Town_A") == 42 | |
assert matrix.get_count(town="Town_B") == 57 | |
assert matrix.get_count(school="School_A", town="Town_A") == 18 | |
assert matrix.get_count(school="School_A", town="Town_B") == 33 | |
assert matrix.get_count(school="School_B", town="Town_A") == 24 | |
assert matrix.get_count(school="School_B", town="Town_B") == 24 |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment