Skip to content

Instantly share code, notes, and snippets.

@caiofcm
Created March 18, 2022 18:59

Revisions

  1. caiofcm created this gist Mar 18, 2022.
    71 changes: 71 additions & 0 deletions parse_wpd_to_pandas.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,71 @@
    import json
    import typing
    from pathlib import Path

    import click
    import numpy as np
    import pandas as pd


    def get_dataset_as_dict(data_loaded: dict) -> dict:

    datasetColl = data_loaded['datasetColl']

    dict_of_data_sets = {}
    for data_set in datasetColl:

    data = data_set['data']
    xy_par = np.array([datum['value'] for datum in data])
    dict_of_data_sets[data_set['name']] = xy_par

    return dict_of_data_sets


    def get_dataset_as_dataframe_from_dict(dict_of_data_sets: dict) -> pd.DataFrame:
    list_of_dfs = []
    for name, xy_matrix in dict_of_data_sets.items():
    df = pd.DataFrame({
    'x': xy_matrix[:,0],
    'y': xy_matrix[:,1],
    'name': name,
    })
    list_of_dfs += [df]

    df_dataset = pd.concat(list_of_dfs, ignore_index=True)

    return df_dataset


    def get_dataset_as_daframe_from_path(fpath: typing.Union[str, Path]) -> pd.DataFrame:
    with open(fpath, 'r') as fp:
    data_loaded = json.load(fp)
    dict_of_data_sets = get_dataset_as_dict(data_loaded)

    df_dataset = get_dataset_as_dataframe_from_dict(dict_of_data_sets)
    return df_dataset


    def main(fpath_json, fpath_out):

    df_dataset = get_dataset_as_daframe_from_path('wpd_project (2).json')

    print(df_dataset.head())

    return df_dataset

    @click.command()
    @click.argument('fpath_json', type=click.Path(exists=True))
    @click.argument('fpath_out', type=click.Path(exists=False))
    def main_click(fpath_json, fpath_out):

    df_dataset = get_dataset_as_daframe_from_path(fpath_json)

    print(df_dataset.head())

    return df_dataset


    if __name__ == "__main__":
    # main()

    main_click()