Last active
August 29, 2015 14:11
-
-
Save ZwodahS/f5464fd302f1fe79b4ee to your computer and use it in GitHub Desktop.
filters dictionary in python.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |
# SOFTWARE. | |
# | |
# Author : Eric (github.com/ZwodahS) | |
# License : Public Domain | |
""" | |
Provide 2 useful functions for python dictionary manipulation. | |
dict_filters : filter a dictionary using a simple include exclude params. | |
dict_equal : recursively check if 2 dictionary is equal. | |
python dict_utils to run the test cases | |
""" | |
import pprint | |
def _fields_dict_filter(fields): | |
''' | |
Separate a list of fields to a set of non-subdocumented fields and a dictionary | |
whose keys are the subdocuments and values are lists of their fields. | |
Example: | |
fields = ["email", "address", "address.coordinates"] | |
Return : {"email", "address"} { "address" : [ "coordinates" ] } | |
''' | |
output_fields = set() | |
output_dict = {} | |
if isinstance(fields, list): | |
for field in fields: | |
if '.' in field: | |
split = field.split(".") | |
if not split[0] in output_dict: | |
output_dict[split[0]] = [] | |
output_dict[split[0]] += [".".join(split[1:])] | |
else: | |
output_fields |= {field} | |
return output_fields, output_dict | |
def dict_filter(data, include=None, exclude=None, exclusive_include=False, preserve_empty_values=True): | |
""" | |
Takes in a data in the form of dictionary, returns a new dictionary that match the criteria | |
kwargs | |
data The dictionary data | |
include The fields (list) to includes. | |
exclude The fields (list) to excludes. | |
exclusive_include If True, then only those included will be returned. | |
preserve_empty_values If False, all empty dictionary will be filtered. | |
The order of operations differs based exclusive_include | |
If exclude_include is True | |
start with an empty list | |
. for each field in the include_list, add it into the output list. | |
. if a field exist in exclude and include at the same time, excludes takes priority. | |
. The more specific case takes priority. | |
If exclusive_include is False | |
start with everything, | |
""" | |
def _internal_filter(_data, _include, _exclude, _use_self_keys): | |
""" | |
keep exclusive_include as a fixed value and use _use_self_keys as the dynamic one. | |
""" | |
if type(_data) not in [dict, list]: | |
return _data if _use_self_keys else None | |
if isinstance(_data, list): | |
t = [ _internal_filter(_d, _include, _exclude, _use_self_keys) for _d in _data ] | |
t = [ v for v in t if preserve_empty_values or v ] | |
return t | |
include_fields, include_dict = _fields_dict_filter(_include) | |
exclude_fields, exclude_dict = _fields_dict_filter(_exclude) | |
keys = ((set() if not _use_self_keys else set(_data.keys())) | include_fields) - exclude_fields | |
out = {} | |
for k in keys: | |
if k in _data: | |
value = None | |
if k in exclude_dict and type(_data[k]) in [list, dict]: | |
value = _internal_filter(_data[k], include_dict.get(k, []), exclude_dict.get(k), True) | |
else: | |
value = _data[k] | |
if preserve_empty_values or value: | |
out[k] = value | |
for k in include_dict: | |
if k in _data and not k in out: | |
value = _internal_filter(_data[k], include_dict.get(k), exclude_dict.get(k, []), False) | |
if preserve_empty_values or value: | |
out[k] = value | |
return out | |
return _internal_filter(data, include, exclude, not exclusive_include) | |
def dict_equal(d1, d2): | |
""" | |
recursively check if 2 dictionary is the same. | |
""" | |
if type(d1) != type(d2): | |
return False | |
if isinstance(d1, dict) and isinstance(d2, dict): | |
set_all = set(d1.keys() + d2.keys()) | |
for k in set_all: | |
if k not in d1 or k not in d2: | |
return False | |
if not dict_equal(d1.get(k), d2.get(k)): | |
return False | |
if isinstance(d1, list) and isinstance(d2, list): | |
if len(d1) != len(d2): | |
return False | |
return all([dict_equal(d[0], d[1]) for d in zip(d1, d2)]) | |
return d1 == d2 | |
if __name__ == "__main__": | |
# test_data = { | |
# 'a' : { 'b' : 1, 'c' : 2, 'd' : 3}, | |
# 'b' : [ 1, 2, 3, 4 ], | |
# 'c' : [ {'a' : { 'b' : 1 }, 'b' : {'a' : 1 } }, {'a' : { 'b' : 2, 'c' : 3} } ] | |
# } | |
test_data={'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]} | |
# test case : (title, query, expectation) | |
TESTS = [ | |
('thing always return everything', {}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('exclusive_include returns nothing', {'exclusive_include' : True}, {}), | |
('preservation of empty list', {'exclude':['c.a', 'c.b']}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4], 'c' : [ {}, {}]}), | |
('non-preservation of empty list', {'exclude':['c.a', 'c.b'], 'preserve_empty_values' : False}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4]}), | |
('exclude single field in root', {'exclude' : ['a'] }, {'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('exclude nested field', {'exclude' : ['a.b'] }, {'a':{'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('exclude multiple nested fields', {'exclude' : ['a.b', 'a.c']}, {'a':{'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('exclude nested field in list dictionary', {'exclude' : ['c.b'] }, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1}},{'a':{'b':2,'c':3}}]}), | |
('exclude not affecting non-matching', {'exclude' : ['b.a'] }, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('include inner field while excluding outer field', {'exclude' : ['a'], 'include' : ['a.b']}, {'a':{'b':1},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}), | |
('include root when exclusive_include', {'exclusive_include':True, 'include':['a']}, {'a':{'b':1,'c':2,'d':3}}), | |
('include nested_fields when exclusive_include', {'exclusive_include':True, 'include':['a.b']}, {'a':{'b':1},}), | |
('include fields in list when exclusive_include', {'exclusive_include':True, 'include':['c.a']}, {'c':[{'a':{'b':1}},{'a':{'b':2,'c':3}}]}), | |
('exclude inner field while including outer field', {'exclusive_include':True, 'include':['a'], 'exclude':['a.b']}, {'a':{'c':2,'d':3}}), | |
] | |
pp = pprint.PrettyPrinter(indent=4) | |
for test in TESTS: | |
result = dict_filter(test_data, **test[1]) | |
expectation = test[2] | |
matched = dict_equal(result, expectation) | |
print("Test ({0}) Result : {1}".format(test[0], "Success" if matched else "Failed")) | |
if not matched: | |
print("Expectation : ") | |
pp.pprint(expectation) | |
print("Found : ") | |
pp.pprint(result) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment