Skip to content

Instantly share code, notes, and snippets.

@ZwodahS
Last active August 29, 2015 14:11
Show Gist options
  • Save ZwodahS/f5464fd302f1fe79b4ee to your computer and use it in GitHub Desktop.
Save ZwodahS/f5464fd302f1fe79b4ee to your computer and use it in GitHub Desktop.
filters dictionary in python.
#
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.
#
# Author : Eric (github.com/ZwodahS)
# License : Public Domain
"""
Provide 2 useful functions for python dictionary manipulation.
dict_filters : filter a dictionary using a simple include exclude params.
dict_equal : recursively check if 2 dictionary is equal.
python dict_utils to run the test cases
"""
import pprint
def _fields_dict_filter(fields):
'''
Separate a list of fields to a set of non-subdocumented fields and a dictionary
whose keys are the subdocuments and values are lists of their fields.
Example:
fields = ["email", "address", "address.coordinates"]
Return : {"email", "address"} { "address" : [ "coordinates" ] }
'''
output_fields = set()
output_dict = {}
if isinstance(fields, list):
for field in fields:
if '.' in field:
split = field.split(".")
if not split[0] in output_dict:
output_dict[split[0]] = []
output_dict[split[0]] += [".".join(split[1:])]
else:
output_fields |= {field}
return output_fields, output_dict
def dict_filter(data, include=None, exclude=None, exclusive_include=False, preserve_empty_values=True):
"""
Takes in a data in the form of dictionary, returns a new dictionary that match the criteria
kwargs
data The dictionary data
include The fields (list) to includes.
exclude The fields (list) to excludes.
exclusive_include If True, then only those included will be returned.
preserve_empty_values If False, all empty dictionary will be filtered.
The order of operations differs based exclusive_include
If exclude_include is True
start with an empty list
. for each field in the include_list, add it into the output list.
. if a field exist in exclude and include at the same time, excludes takes priority.
. The more specific case takes priority.
If exclusive_include is False
start with everything,
"""
def _internal_filter(_data, _include, _exclude, _use_self_keys):
"""
keep exclusive_include as a fixed value and use _use_self_keys as the dynamic one.
"""
if type(_data) not in [dict, list]:
return _data if _use_self_keys else None
if isinstance(_data, list):
t = [ _internal_filter(_d, _include, _exclude, _use_self_keys) for _d in _data ]
t = [ v for v in t if preserve_empty_values or v ]
return t
include_fields, include_dict = _fields_dict_filter(_include)
exclude_fields, exclude_dict = _fields_dict_filter(_exclude)
keys = ((set() if not _use_self_keys else set(_data.keys())) | include_fields) - exclude_fields
out = {}
for k in keys:
if k in _data:
value = None
if k in exclude_dict and type(_data[k]) in [list, dict]:
value = _internal_filter(_data[k], include_dict.get(k, []), exclude_dict.get(k), True)
else:
value = _data[k]
if preserve_empty_values or value:
out[k] = value
for k in include_dict:
if k in _data and not k in out:
value = _internal_filter(_data[k], include_dict.get(k), exclude_dict.get(k, []), False)
if preserve_empty_values or value:
out[k] = value
return out
return _internal_filter(data, include, exclude, not exclusive_include)
def dict_equal(d1, d2):
"""
recursively check if 2 dictionary is the same.
"""
if type(d1) != type(d2):
return False
if isinstance(d1, dict) and isinstance(d2, dict):
set_all = set(d1.keys() + d2.keys())
for k in set_all:
if k not in d1 or k not in d2:
return False
if not dict_equal(d1.get(k), d2.get(k)):
return False
if isinstance(d1, list) and isinstance(d2, list):
if len(d1) != len(d2):
return False
return all([dict_equal(d[0], d[1]) for d in zip(d1, d2)])
return d1 == d2
if __name__ == "__main__":
# test_data = {
# 'a' : { 'b' : 1, 'c' : 2, 'd' : 3},
# 'b' : [ 1, 2, 3, 4 ],
# 'c' : [ {'a' : { 'b' : 1 }, 'b' : {'a' : 1 } }, {'a' : { 'b' : 2, 'c' : 3} } ]
# }
test_data={'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}
# test case : (title, query, expectation)
TESTS = [
('thing always return everything', {}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('exclusive_include returns nothing', {'exclusive_include' : True}, {}),
('preservation of empty list', {'exclude':['c.a', 'c.b']}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4], 'c' : [ {}, {}]}),
('non-preservation of empty list', {'exclude':['c.a', 'c.b'], 'preserve_empty_values' : False}, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4]}),
('exclude single field in root', {'exclude' : ['a'] }, {'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('exclude nested field', {'exclude' : ['a.b'] }, {'a':{'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('exclude multiple nested fields', {'exclude' : ['a.b', 'a.c']}, {'a':{'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('exclude nested field in list dictionary', {'exclude' : ['c.b'] }, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1}},{'a':{'b':2,'c':3}}]}),
('exclude not affecting non-matching', {'exclude' : ['b.a'] }, {'a':{'b':1,'c':2,'d':3},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('include inner field while excluding outer field', {'exclude' : ['a'], 'include' : ['a.b']}, {'a':{'b':1},'b':[1,2,3,4],'c':[{'a':{'b':1},'b':{'a':1}},{'a':{'b':2,'c':3}}]}),
('include root when exclusive_include', {'exclusive_include':True, 'include':['a']}, {'a':{'b':1,'c':2,'d':3}}),
('include nested_fields when exclusive_include', {'exclusive_include':True, 'include':['a.b']}, {'a':{'b':1},}),
('include fields in list when exclusive_include', {'exclusive_include':True, 'include':['c.a']}, {'c':[{'a':{'b':1}},{'a':{'b':2,'c':3}}]}),
('exclude inner field while including outer field', {'exclusive_include':True, 'include':['a'], 'exclude':['a.b']}, {'a':{'c':2,'d':3}}),
]
pp = pprint.PrettyPrinter(indent=4)
for test in TESTS:
result = dict_filter(test_data, **test[1])
expectation = test[2]
matched = dict_equal(result, expectation)
print("Test ({0}) Result : {1}".format(test[0], "Success" if matched else "Failed"))
if not matched:
print("Expectation : ")
pp.pprint(expectation)
print("Found : ")
pp.pprint(result)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment