Last active
May 3, 2021 05:27
-
-
Save shreyasms17/789b838691eacf3faec23eaf05797ca4 to your computer and use it in GitHub Desktop.
AutoFlatten unnest_dict
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def unnest_dict(self, json, cur_path): | |
''' | |
Description: | |
This function unnests the dictionaries in the json schema recursively | |
and maps the hierarchical path to the field to the column name when it encounters a leaf node | |
:param json: [type: dict] contains metadata about the field | |
:param cur_path: [type: str] contains hierarchical path to that field, each parent separated by a '.' | |
''' | |
if self.is_leaf(json): | |
self.all_fields[f"{cur_path}.{json['name']}"] = json['name'] | |
return | |
else: | |
if isinstance(json, list): | |
for i in range(len(json)): | |
self.unnest_dict(json[i], cur_path) | |
elif isinstance(json, dict): | |
if isinstance(json['type'], str): | |
cur_path = f"{cur_path}.{json['name']}" if json['type'] != 'struct' else cur_path | |
self.unnest_dict(json['type'], cur_path) | |
else: | |
if json['type']['type'] == 'array': | |
cur_path = f"{cur_path}.{json['name']}" | |
if isinstance(json['type']['elementType'], dict): | |
self.cols_to_explode.add(cur_path) | |
self.unnest_dict(json['type']['elementType']['fields'], cur_path) | |
else: | |
self.cols_to_explode.add(cur_path) | |
self.all_fields[f"{cur_path}"] = json['name'] | |
return | |
elif json['type']['type'] == 'struct': | |
cur_path = f"{cur_path}.{json['name']}" | |
self.unnest_dict(json['type']['fields'], cur_path) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment