Emmunaf · May 3, 2020 18:53
diff --git a/book_download_category.py b/book_download_category.py
 import pandas as pd, os,  shutil
 import requests
 from bs4 import BeautifulSoup

 df = pd.read_excel("Free+English+textbooks.xlsx")

 currentpath = os.getcwd()
 download_folder = os.path.join(str(currentpath), str("download/"))

 for cat in df["English Package Name"].unique():
    folderpath = os.path.join(str(currentpath), str("download/" + str(cat)))
    if not os.path.exists(folderpath):
        os.makedirs(folderpath)
        print('Created folder:', folderpath)
    
 for index, row in df.iterrows():
        category = row.loc["English Package Name"]
        file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
        #OpenURL
        download_url = row.loc['OpenURL']
        response=requests.get(download_url)
        soup=BeautifulSoup(response.text,'html.parser')
        pages_list = []
        for link in soup.find_all('a'):
            if "/content" in link.get('href'):
                download_pdfurl = "https://link.springer.com"+link.get('href')
                break
        downloaded_name = f"{download_folder}{category}/{file_name}.pdf"
        print(f"[INFO] Downloading to: {downloaded_name}")        
        myfile = requests.get(download_pdfurl)
        open(f"{download_folder}{category}/{file_name}.pdf", 'wb').write(myfile.content)
        print(f"[INFO] Download completed: {downloaded_name}")
	import pandas as pd, os, shutil
	import requests
	from bs4 import BeautifulSoup

	df = pd.read_excel("Free+English+textbooks.xlsx")

	currentpath = os.getcwd()
	download_folder = os.path.join(str(currentpath), str("download/"))

	for cat in df["English Package Name"].unique():
	folderpath = os.path.join(str(currentpath), str("download/" + str(cat)))
	if not os.path.exists(folderpath):
	os.makedirs(folderpath)
	print('Created folder:', folderpath)

	for index, row in df.iterrows():
	category = row.loc["English Package Name"]
	file_name = f"{row.loc['Book Title']}_{row.loc['Edition']}".replace('/','-').replace(':','-')
	#OpenURL
	download_url = row.loc['OpenURL']
	response=requests.get(download_url)
	soup=BeautifulSoup(response.text,'html.parser')
	pages_list = []
	for link in soup.find_all('a'):
	if "/content" in link.get('href'):
	download_pdfurl = "https://link.springer.com"+link.get('href')
	break
	downloaded_name = f"{download_folder}{category}/{file_name}.pdf"
	print(f"[INFO] Downloading to: {downloaded_name}")
	myfile = requests.get(download_pdfurl)
	open(f"{download_folder}{category}/{file_name}.pdf", 'wb').write(myfile.content)
	print(f"[INFO] Download completed: {downloaded_name}")
No results found