Created
June 8, 2018 04:31
-
-
Save evorios/701063b2c7a65f371517082530a53cd6 to your computer and use it in GitHub Desktop.
Script for downloading photos by tag or albums from Yandex.Photos (Яндекс.Фотки)
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
#https://conda.io/docs/user-guide/tasks/manage-environments.html#activating-an-environment | |
#https://fotki.yandex.ru/users/onna63/tags/ | |
#cmder | |
#conda | |
#conda env list | |
#activate root | |
#python --version | |
#python dump.py -h | |
#python dump.py -g кошки Onna63 | |
import os | |
import time | |
import re | |
import urllib.request | |
from urllib.parse import quote | |
import json | |
import argparse | |
user_url = "http://api-fotki.yandex.ru/api/users/{}/albums/?limit=100&&format=json" | |
album_url = "http://api-fotki.yandex.ru/api/users/{}/album/{}/photos/?limit=100&format=json" | |
tag_url = "http://api-fotki.yandex.ru/api/users/{}/tag/{}/photos/?limit=100&format=json" | |
CREATED = 1 | |
PUBLISHED = 2 | |
def grab(user_id, album_id, dest, use_title, use_date, tag, next = None): | |
url = ((next is not None) and next) or ((album_id is None) and tag_url.format(user_id, quote(tag))) or album_url.format(user_id, album_id) | |
print('url: {}'.format(url)) | |
album = json.loads(urllib.request.urlopen(url).read().decode("utf-8")) | |
if not "entries" in album: | |
return | |
album_dir = os.path.join(dest, album["title"]) | |
if not os.path.isdir(album_dir): | |
os.makedirs(album_dir) | |
if next is None: | |
print('Downloading album "{}" (id: {}) (tag: {}) (number: {})...'.format(album["title"], album_id, tag, len(album["entries"]))) | |
for image in album["entries"]: | |
if use_date == CREATED and "created" in image: | |
t = time.mktime(time.strptime(image["created"], "%Y-%m-%dT%H:%M:%SZ")) | |
elif use_date == PUBLISHED: | |
t = time.mktime(time.strptime(image["published"], "%Y-%m-%dT%H:%M:%SZ")) | |
else: | |
t = time.time() | |
if use_title and image["title"].lower() not in ["", ".jpg"]: | |
filename = os.path.join(album_dir, image["title"]) | |
if not image["title"].lower().endswith(".jpg"): | |
filename += ".jpg" | |
if os.path.exists(filename): | |
print('"{}" already exists. Skipped.'.format(filename)) | |
continue | |
try: | |
f = open(filename, mode="wb") | |
f.write(urllib.request.urlopen(image["img"]["orig"]["href"]).read()) | |
f.close() | |
os.utime(filename, (time.time(), t)) | |
continue | |
except IOError: | |
pass | |
filename = os.path.join(album_dir, re.search("\d+$", image["id"]).group() + ".jpg") | |
if os.path.exists(filename): | |
print('"{}" already exists. Skipped.'.format(filename)) | |
continue | |
try: | |
f = open(filename, mode="wb") | |
f.write(urllib.request.urlopen(image["img"]["orig"]["href"]).read()) | |
f.close() | |
os.utime(filename, (time.time(), t)) | |
except IOError: | |
print('"{}" cannot be saved. Skipped.'.format(filename)) | |
if "next" in album["links"]: | |
grab(user_id, album_id, dest, use_title, use_date, tag, album["links"]["next"]) | |
if __name__ == "__main__": | |
parser = argparse.ArgumentParser(description="Downloads albums from Yandex.Fotki. Skips files that already exist.") | |
parser.add_argument("user") | |
parser.add_argument("-a", "--albums", nargs="*", metavar="ID", help="list of album ids to proceed (download all if empty, prompt for every album if the argument is omitted)") | |
parser.add_argument("-g", "--tag", default="", metavar="TAG", help="tag") | |
parser.add_argument("-d", "--dest", default="", metavar="DIR", help="output directory") | |
parser.add_argument("-t", "--use-title", action="store_true", help="use title as file name (if possible)") | |
date_group = parser.add_mutually_exclusive_group() | |
date_group.add_argument("-c", "--use-cdate", dest="use_date", action="store_const", const=CREATED, help="use creation date as modification date (if available)") | |
date_group.add_argument("-p", "--use-pdate", dest="use_date", action="store_const", const=PUBLISHED, help="use publishing date as modification date") | |
args = parser.parse_args() | |
if (args.tag == ""): | |
url = user_url.format(args.user) | |
user = json.loads(urllib.request.urlopen(url).read().decode("utf-8")) | |
if "entries" in user: | |
for album in user["entries"]: | |
if album["imageCount"] == 0: | |
continue | |
album_id = re.search("\d+$", album["id"]).group() | |
if (args.albums is None and input('Download album "{}" (id: {})? '.format(album["title"], album_id)) in ["y", "Y"]) or (args.albums is not None and (args.albums == [] or album_id in args.albums)): | |
grab(args.user, album_id, args.dest, args.use_title, args.use_date, args.tag) | |
else: | |
grab(args.user, None, args.dest, args.use_title, args.use_date, args.tag) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment