Created
May 21, 2015 18:06
-
-
Save fbwright/4bda53e0363d73cad076 to your computer and use it in GitHub Desktop.
Manga downloader
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
#by fbWright | |
from __future__ import print_function, division | |
import sys, bs4, requests, os, os.path, json | |
from urllib.parse import urlparse, urljoin | |
from unidecode import unidecode | |
if sys.version_info.major < 3: | |
input = raw_input | |
BASEDIR = os.path.expanduser("~") | |
BASEDIR = os.path.join(BASEDIR, "Pictures") | |
BASEDIR = os.path.join(BASEDIR, "Manga") | |
CHUNK_SIZE = 16*1024 | |
def download_image(source, destination): | |
#destination = os.path.join(BASEDIR, destination) | |
r = requests.get(source, stream=True) | |
i = 0 | |
if r.status_code == 200: | |
with open(destination, "wb") as f: | |
for chunk in r.iter_content(CHUNK_SIZE): | |
f.write(chunk) | |
i += 1 | |
return i | |
def download_chapter(address, folder, chapter): | |
print("Downloading chapter %s"%chapter) | |
if not os.path.exists(folder): | |
os.mkdir(folder) | |
counter_file = os.path.join(folder, "counter") | |
if os.path.exists(counter_file): | |
with open(counter_file, "r") as file: | |
try: | |
counter = int(file.read()) | |
except ValueError: | |
counter = 0 | |
else: | |
counter = 0 | |
data = requests.get(address) | |
data = data.text.splitlines() | |
data = filter(lambda i: i.strip().startswith("lstImages.push"), data) | |
data = map(lambda i: i.strip()[16:-3], data) | |
for i, image in enumerate(data): | |
if i <= counter: | |
continue | |
ext = os.path.basename(urlparse(image).path).split(".")[1] | |
filename = "%04d.%s"%(i, ext) | |
filename = os.path.join(folder, filename) | |
download_image(image, filename) | |
with open(counter_file, "w") as file: | |
file.write(str(counter)) | |
counter += 1 | |
print("Downloaded %04d.%s."%(i, ext)) | |
def download_manga(name): | |
address = "http://kissmanga.com/Manga/%s" % name | |
directory = os.path.join(BASEDIR, name) | |
config_file = os.path.join(directory, "config.json") | |
manga = {"info":{}, "chapters":{}} | |
if not os.path.exists(directory): | |
os.mkdir(directory) | |
# Pulls the data from the server | |
data = requests.get(address) | |
data = bs4.BeautifulSoup(data.text) | |
if os.path.exists(config_file): | |
with open(config_file, "r") as file: | |
manga = json.load(file) | |
else: | |
# Parse the info about the manga | |
info = data.find(class_="barContent") | |
info = info.text | |
manga["info"] = unidecode(info) | |
# Find all the chapters | |
chapters = data.find(class_="chapterList") | |
chapters = chapters.find_all("a")[1:] | |
chapters = chapters[::-1] | |
chapters = [(chapter.text.strip(), "http://kissmanga.com" + chapter["href"]) | |
for chapter in chapters] | |
manga["chapters"] = chapters | |
# Download the chapters | |
for i, (chapter_name, address) in enumerate(chapters): | |
folder = os.path.join(directory, "%03d" % i) | |
download_chapter(address, folder, i) | |
# Save the data | |
with open(config_file, "w") as file: | |
json.dump(manga, file, indent=2) | |
address = "http://kissmanga.com/Manga/Amnesian/3?id=39771" | |
def print_help(): | |
print("""Manga downloader | |
Simple python program to download manga from the kissmanga.com | |
website, using requests and BeautifulSoup. | |
Usage | |
manga get [NAME]""") | |
if __name__=="__main__": | |
if len(sys.argv) < 3: | |
print_help() | |
sys.exit() | |
command = sys.argv[1] | |
param = sys.argv[2] | |
if command.lower() == "get": | |
download_manga(param) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment