Created
August 2, 2019 21:39
Revisions
-
binary1230 created this gist
Aug 2, 2019 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,60 @@ # all of this is terrible, don't use it. # purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export. # if any names aren't similar, print out a CSV file with names you should import into Hubspot. # # seriously, this is horrible I wrote it very quickly there are probably better ways to do this. import pandas as pd from fuzzywuzzy import fuzz import sys threshold_for_match = 90 df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False) df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False) contacts = {} print('\npreping iphone data') for index, row in df_iphone.iterrows(): fullname = row['First Name'] + ' ' + row['Last Name'] contacts[fullname.strip()] = {'df': row, 'include': True} print('.', end='') print('\ncross-checking iphone data') c = 0 for index, row in df_hubspot.iterrows(): fullname = row['First Name'] + ' ' + row['Last Name'] fullname = fullname.strip() c += 1 if c % 10 == 0: print('.', end='') sys.stdout.flush() for name, contact in contacts.items(): if not name or not fullname: continue score = fuzz.ratio(name, fullname) if score > threshold_for_match: contact['include'] = False # if name != fullname: # print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname) break print('\n') dfs = [] for name, contact in contacts.items(): if contact['include']: dfs.append(contact['df'].to_dict()) import csv keys = dfs[0].keys() with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file: dict_writer = csv.DictWriter(output_file, keys) dict_writer.writeheader() dict_writer.writerows(dfs)