Skip to content

Instantly share code, notes, and snippets.

@binary1230
Created August 2, 2019 21:39

Revisions

  1. binary1230 created this gist Aug 2, 2019.
    60 changes: 60 additions & 0 deletions gistfile1.txt
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,60 @@
    # all of this is terrible, don't use it.
    # purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export.
    # if any names aren't similar, print out a CSV file with names you should import into Hubspot.
    #
    # seriously, this is horrible I wrote it very quickly there are probably better ways to do this.

    import pandas as pd
    from fuzzywuzzy import fuzz
    import sys

    threshold_for_match = 90

    df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False)
    df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False)

    contacts = {}

    print('\npreping iphone data')
    for index, row in df_iphone.iterrows():
    fullname = row['First Name'] + ' ' + row['Last Name']
    contacts[fullname.strip()] = {'df': row, 'include': True}
    print('.', end='')

    print('\ncross-checking iphone data')
    c = 0
    for index, row in df_hubspot.iterrows():
    fullname = row['First Name'] + ' ' + row['Last Name']
    fullname = fullname.strip()

    c += 1
    if c % 10 == 0:
    print('.', end='')
    sys.stdout.flush()

    for name, contact in contacts.items():
    if not name or not fullname:
    continue

    score = fuzz.ratio(name, fullname)
    if score > threshold_for_match:
    contact['include'] = False
    # if name != fullname:
    # print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname)
    break

    print('\n')

    dfs = []
    for name, contact in contacts.items():
    if contact['include']:
    dfs.append(contact['df'].to_dict())

    import csv

    keys = dfs[0].keys()
    with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
    dict_writer = csv.DictWriter(output_file, keys)
    dict_writer.writeheader()
    dict_writer.writerows(dfs)