binary1230 · August 2, 2019 21:39 · Aug 2, 2019
diff --git a/gistfile1.txt b/gistfile1.txt
@@ -0,0 +1,60 @@
+# all of this is terrible, don't use it. 
+# purpose: take a contact list from an IPHONE icloud dump, compare to a Hubspot export.
+# if any names aren't similar, print out a CSV file with names you should import into Hubspot.
+#
+# seriously, this is horrible I wrote it very quickly there are probably better ways to do this.
+
+import pandas as pd
+from fuzzywuzzy import fuzz
+import sys
+
+threshold_for_match = 90
+
+df_hubspot = pd.read_excel('data/hubspot-crm-exports-all-contacts.xls', na_filter=False)
+df_iphone = pd.read_excel('data/iphone export.xlsx', na_filter=False)
+
+contacts = {}
+
+print('\npreping iphone data')
+for index, row in df_iphone.iterrows():
+    fullname = row['First Name'] + ' ' + row['Last Name']
+    contacts[fullname.strip()] = {'df': row, 'include': True}
+    print('.', end='')
+
+print('\ncross-checking iphone data')
+c = 0
+for index, row in df_hubspot.iterrows():
+    fullname = row['First Name'] + ' ' + row['Last Name']
+    fullname = fullname.strip()
+
+    c += 1
+    if c % 10 == 0:
+        print('.', end='')
+        sys.stdout.flush()
+
+    for name, contact in contacts.items():
+        if not name or not fullname:
+            continue
+
+        score = fuzz.ratio(name, fullname)
+        if score > threshold_for_match:
+            contact['include'] = False
+            # if name != fullname:
+            #    print("NAMES ARE CLOSE ENOUGH TO EXCLUDE: " + name + ' ' + fullname)
+            break
+
+print('\n')
+
+dfs = []
+for name, contact in contacts.items():
+    if contact['include']:
+        dfs.append(contact['df'].to_dict())
+
+import csv
+
+keys = dfs[0].keys()
+with open('c:/tmp/output.csv', 'w', encoding='utf-8-sig', newline='') as output_file:
+    dict_writer = csv.DictWriter(output_file, keys)
+    dict_writer.writeheader()
+    dict_writer.writerows(dfs)
+