Last active
July 27, 2016 10:18
-
-
Save trianglesis/b7dd7a7f975d661fcecf to your computer and use it in GitHub Desktop.
vkid-randomizer
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
''' | |
This script is used to parse some html file | |
where vk ids stored and then choose | |
one of them randomly and export it to csv file | |
1. Replaced error with charset by replace character | |
2. Change the way how print was formatted | |
2.1 Added random - used range from list of ids | |
3. Added CSV export tool for one man | |
''' | |
''' | |
Global variables here | |
global vk_read | |
''' | |
from html.parser import HTMLParser | |
import re, sys, random, csv | |
with open('test.html', 'r') as content_file: | |
read_data = content_file.read() | |
content_file.closed | |
vk_ids = [] | |
vk_men = [] | |
from html.parser import HTMLParser | |
import re, sys, random, csv | |
class MyHTMLParser(HTMLParser): | |
def handle_starttag(self, tag, attrs): | |
global vk_read | |
href = str(attrs) | |
for line in href: | |
id_tag = re.findall('/\S+$', href) | |
id_raw = str(id_tag) | |
if re.search('/\w+\'\)\]', id_raw): | |
vk_read = id_raw | |
else: | |
break | |
for ch in ['/', ')', '[', ']', '"', "'"]: | |
if ch in vk_read: | |
vk_read = vk_read.replace(ch, "") | |
# http://stackoverflow.com/questions/30328193/python-add-string-to-a-list-loop | |
for vk_id in vk_read: | |
if vk_id not in vk_ids: | |
vk_ids.append(vk_read) | |
break | |
random_id = random.choice(vk_ids) | |
with open('vk_winners.csv', 'w', encoding='utf-8') as csvfile: | |
write = csv.writer(csvfile, delimiter=' ') | |
write.writerow([random_id]) | |
# print(vk_ids) | |
break | |
parser = MyHTMLParser() | |
parser.feed(read_data) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This is really crap!
Left if here just for LULZ.