Skip to content

Instantly share code, notes, and snippets.

@Emmunaf
Created May 3, 2020 18:53
Show Gist options
  • Save Emmunaf/85849bd07ee2f7726a96381a1dc1bd06 to your computer and use it in GitHub Desktop.
Save Emmunaf/85849bd07ee2f7726a96381a1dc1bd06 to your computer and use it in GitHub Desktop.
import pandas as pd, os, shutil
import requests
from bs4 import BeautifulSoup
df = pd.read_excel("Free+English+textbooks.xlsx")
currentpath = os.getcwd()
download_folder = os.path.join(str(currentpath), str("download/"))
for cat in df["English Package Name"].unique():
folderpath = os.path.join(str(currentpath), str("download/" + str(cat)))
if not os.path.exists(folderpath):
os.makedirs(folderpath)
print('Created folder:', folderpath)
for index, row in df.iterrows():
category = row.loc["English Package Name"]
file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
#OpenURL
download_url = row.loc['OpenURL']
response=requests.get(download_url)
soup=BeautifulSoup(response.text,'html.parser')
pages_list = []
for link in soup.find_all('a'):
if "/content" in link.get('href'):
download_pdfurl = "https://link.springer.com"+link.get('href')
break
downloaded_name = f"{download_folder}{category}/{file_name}.pdf"
print(f"[INFO] Downloading to: {downloaded_name}")
myfile = requests.get(download_pdfurl)
open(f"{download_folder}{category}/{file_name}.pdf", 'wb').write(myfile.content)
print(f"[INFO] Download completed: {downloaded_name}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment