#!/usr/bin/env python # -*- coding: latin-1 -*- import google import requests import magic def get_pdf(url, title): print url pdf = requests.get(url) with magic.Magic() as m: res = m.id_buffer(pdf.content) if 'pdf' not in res.lower(): return False f = open(title + ".pdf", 'w') f.write(pdf.content) return True refs = "references.txt" refs = open(refs) for line in refs: line = line.strip() if not line: continue if line[0] != '[': continue stuff = line.split(',') for elem in stuff: if '.' in elem: continue title = elem.strip() break if title[:3] == "“": title = title[3:] search_results = google.search("pdf " + line) for res in search_results: if get_pdf(res, title): break