Skip to content

Instantly share code, notes, and snippets.

@chrisking
Created August 18, 2022 22:08
Show Gist options
  • Save chrisking/654eccd9e6bb56f4d195baec47e9725f to your computer and use it in GitHub Desktop.
Save chrisking/654eccd9e6bb56f4d195baec47e9725f to your computer and use it in GitHub Desktop.
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
from icrawler.builtin import BingImageCrawler
import random
import pprint
mitFilter=True
filters = dict(type='photo',
license='commercial,modify') # either photo, face, clipart, linedrawing, animated
celebs = []
with open('list_of_celebrities.txt') as reader:
lines = reader.readlines()
for line in lines:
celebs.append(line.strip('\n'))
# We want 100 of these so
howmany_of_each = 3
howmany_celebs = 20
search_keyword = random.sample(celebs, howmany_celebs)
pprint.pprint(search_keyword)
n=0
for keyword in search_keyword:
n=n+1
print(n)
crawler = BingImageCrawler(
parser_threads=6,
downloader_threads=6,
storage={'root_dir': 'data/images/{}'.format(keyword)}
)
if mitFilter==True:
crawler.crawl(keyword=keyword, filters=filters,max_num=howmany_of_each, min_size=(500, 500))
else:
crawler.crawl(keyword=keyword, max_num=howmany_of_each, min_size=(500, 500))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment