Created
January 31, 2016 03:47
-
-
Save Javran/80ad70ee139ddf4364eb to your computer and use it in GitHub Desktop.
downloader for curgentleman
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python3 | |
import os | |
import re | |
import sys | |
import json | |
import asyncio | |
import aiohttp | |
import urllib.request | |
import logging | |
from itertools import count | |
sem = asyncio.Semaphore(5) | |
chunk_size = 100 * 1024 # 100kb | |
headers = None | |
async def getimg(title, showkey, url): | |
global chunk_size, headers | |
imgkey, gidpage = url.split('/')[-2:] | |
gid, page = gidpage.split('-', 1) | |
filename = title + '/' + str(page) + '.jpg' | |
if os.path.exists(filename): | |
sys.stdout.write(str(page) + ' ') | |
sys.stdout.flush() | |
return | |
data = dict(method='showpage', gid=gid, page=page, | |
imgkey=imgkey, showkey=showkey) | |
url = None | |
while True: | |
try: | |
async with sem: | |
with aiohttp.Timeout(5.0): | |
async with aiohttp.post('http://exhentai.org/api.php', | |
data=json.dumps(data), | |
headers=headers) as r: | |
j = await r.json() | |
for v in j.values(): | |
if isinstance(v, str) and v.startswith('<a onclick'): | |
url = re.search(r'src="([^"]*)"', v).group(1) | |
break | |
except: | |
sys.stdout.write(str(page) + '? ') | |
sys.stdout.flush() | |
await asyncio.sleep(1.0) | |
assert url.split('.')[-1] == 'jpg', 'image type' | |
assert url, 'api error' | |
fails = 0 | |
while True: | |
try: | |
async with sem: | |
with aiohttp.Timeout(5.0): | |
async with aiohttp.get(url) as r: | |
with open(filename, 'wb') as f: | |
while True: | |
chunk = await r.content.read(chunk_size) | |
if not chunk: | |
break | |
f.write(chunk) | |
break | |
except: | |
fails += 1 | |
if fails == 5: | |
url = re.sub('/((\d{1,3}\.){3}\d{1,3}:?(\d{1,5}?))/', | |
'/37.48.81.80/', url) | |
sys.stdout.write(str(page) + ('? ' if fails <= 5 else '?? ')) | |
sys.stdout.flush() | |
await asyncio.sleep(1.0) | |
sys.stdout.write(str(page) + '✔ ') | |
sys.stdout.flush() | |
def get_download_tasks(url): | |
global headers | |
sys.stdout.write('Pages:\n') | |
sys.stdout.flush() | |
# get all pages | |
pages = [] | |
for p in count(0): | |
req = urllib.request.Request(url + '/?p=' + str(p), headers=headers) | |
text = urllib.request.urlopen(req).read().decode('utf-8') | |
sys.stdout.write(str(p) + '✔ ') | |
sys.stdout.flush() | |
pgs = re.findall(r'http://exhentai.org/s/[^"]*', text) | |
if pages and pgs[-1] == pages[-1]: | |
break | |
pages.extend(pgs) | |
print() | |
# get showkey | |
req = urllib.request.Request(pages[0], headers=headers) | |
text = urllib.request.urlopen(req).read().decode('utf-8') | |
showkey = re.search(r'showkey="([^"]*)"', text).group(1) | |
title = re.search(r'<title>([^<]*)<', text).group(1) | |
title = title.replace(' ', '_') | |
if not os.path.exists(title): | |
os.mkdir(title) | |
return [getimg(title, showkey, p) for p in pages] | |
if __name__ == '__main__': | |
# no "decode JSON with unexpected mimetype" warnings | |
logging.getLogger('aiohttp.client').setLevel(logging.ERROR) | |
headers = {'Cookie': open('cookies.txt').read().strip()} | |
if len(sys.argv) == 1: | |
# url = 'http://exhentai.org/g/811536/5a696a817a' | |
url = 'http://exhentai.org/g/831911/c4e250474c' | |
else: | |
url = sys.argv[1] | |
tasks = get_download_tasks(url.rstrip('/')) | |
print('Tasks:', len(tasks)) | |
asyncio.get_event_loop().run_until_complete(asyncio.wait(tasks)) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment