Created
May 13, 2022 12:47
-
-
Save pbelskiy/10d4d483b5a169c845fa76ec9576beb9 to your computer and use it in GitHub Desktop.
Bumble parser
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/local/bin/python3 | |
import os | |
import hashlib | |
import time | |
import random | |
import requests | |
import selenium | |
from selenium import webdriver | |
def transliterate(name): | |
slovar = {'а':'a','б':'b','в':'v','г':'g','д':'d','е':'e','ё':'yo', | |
'ж':'zh','з':'z','и':'i','й':'i','к':'k','л':'l','м':'m','н':'n', | |
'о':'o','п':'p','р':'r','с':'s','т':'t','у':'u','ф':'f','х':'h', | |
'ц':'c','ч':'ch','ш':'sh','щ':'sch','ъ':'','ы':'y','ь':'','э':'e', | |
'ю':'u','я':'ya', 'А':'A','Б':'B','В':'V','Г':'G','Д':'D','Е':'E','Ё':'YO', | |
'Ж':'ZH','З':'Z','И':'I','Й':'I','К':'K','Л':'L','М':'M','Н':'N', | |
'О':'O','П':'P','Р':'R','С':'S','Т':'T','У':'U','Ф':'F','Х':'H', | |
'Ц':'C','Ч':'CH','Ш':'SH','Щ':'SCH','Ъ':'','Ы':'y','Ь':'','Э':'E', | |
'Ю':'U','Я':'YA',',':'','?':'',' ':'_','~':'','!':'','@':'','#':'', | |
'$':'','%':'','^':'','&':'','*':'','(':'',')':'','-':'','=':'','+':'', | |
':':'',';':'','<':'','>':'','\'':'','"':'','\\':'','/':'','№':'', | |
'[':'',']':'','{':'','}':'','ґ':'','ї':'', 'є':'','Ґ':'g','Ї':'i', | |
'Є':'e', '—':''} | |
for key in slovar: | |
name = name.replace(key, slovar[key]) | |
return name | |
url = 'https://bumble.com/app' | |
PROJECT_ROOT = os.path.abspath(os.path.dirname(__file__)) | |
DRIVER_BIN = os.path.join(PROJECT_ROOT, 'chromedriver') | |
PROFILE_DIR = os.path.join(PROJECT_ROOT, 'bumble') | |
os.makedirs(PROFILE_DIR, exist_ok=True) | |
driver = webdriver.Chrome(executable_path=DRIVER_BIN) | |
driver.get(url) | |
input('enter login') | |
total = 0 | |
while True: | |
time.sleep(1) | |
try: | |
name = driver.find_element_by_class_name('encounters-story-profile__name') | |
name = name.text.strip() | |
if not name: | |
print('no name') | |
continue | |
age = driver.find_element_by_class_name('encounters-story-profile__age') | |
age = age.text.strip(',').strip() | |
desc = '' | |
abouts = driver.find_elements_by_class_name('encounters-story-about__text') | |
for about in abouts: | |
desc += about.get_attribute('innerHTML') + '\n' | |
images = driver.find_elements_by_class_name('media-box__picture-image') | |
profile_path = None | |
if not images: | |
input('there is no images') | |
continue | |
for image in images: | |
src = image.get_attribute('src') | |
response = requests.get(src) | |
if response.status_code == 200: | |
if profile_path is None: | |
h = hashlib.sha256() | |
h.update(response.content) | |
hexdigest = h.hexdigest() | |
new_name = transliterate(name) | |
profile_path = os.path.join(PROFILE_DIR, f'{hexdigest}_{new_name}_{age}') | |
os.makedirs(profile_path, exist_ok=True) | |
else: | |
h = hashlib.sha256() | |
h.update(response.content) | |
hexdigest = h.hexdigest() | |
with open(os.path.join(profile_path, f'{hexdigest}.jpg'), 'wb') as f: | |
f.write(response.content) | |
if desc: | |
with open(os.path.join(profile_path, 'about.txt'), 'w') as f: | |
f.write(desc) | |
print(profile_path) | |
# input('next?') | |
# next | |
time.sleep(1 + random.random()) | |
dislike = driver.find_element_by_class_name('encounters-action--dislike') | |
dislike.click() | |
except selenium.common.exceptions.StaleElementReferenceException: | |
continue | |
driver.quit() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment