Last active
July 28, 2023 05:03
Revisions
-
vinovator revised this gist
Jun 24, 2016 . 1 changed file with 18 additions and 12 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -17,11 +17,12 @@ # Parent directory under which all content to be downloaded # ../../ notation will take 3 levels up base_dir = "../../Vinoth/MOOC/Udacity/" # Udacity id of the courses of interest # Configure this list to download multiple course content # course_lst = ["ud617", "ud359"] course_lst = ["ud675", "ud741", "ud820"] # Dict to map course id with course name courses = dict() @@ -45,8 +46,9 @@ print("***Downloading {}".format(courses[course_id])) # Construct the path to download content, and strip unwanted characters download_dir = os.path.join(base_dir, courses[course_id].replace(":", "")) print download_dir if not os.path.exists(download_dir): os.makedirs(download_dir) @@ -82,18 +84,22 @@ os.path.join(download_dir, name))) # Unzip the file and save in current location try: with zipfile.ZipFile( os.path.join(download_dir, name), "r") as zfile: # Save the extracted file under a folder with same name # .zip extension is striped out get folder name zfile.extractall( os.path.join(download_dir, name.split(".")[0])) print("Zip file extracted") except Exception as e: print (e) continue # After extraction remove original zip file try: os.remove(os.path.join(download_dir, name)) print("Zip file deleted") except Exception as e: print (e) continue -
vinovator revised this gist
Jan 14, 2016 . 1 changed file with 8 additions and 5 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -16,6 +16,7 @@ import os # Parent directory under which all content to be downloaded # ../../ notation will take 3 levels up base_dir = "../../Vinoth/Udacity/" # Udacity id of the courses of interest @@ -44,8 +45,7 @@ print("***Downloading {}".format(courses[course_id])) # Construct the path to download content download_dir = os.path.join(base_dir, courses[course_id]) if not os.path.exists(download_dir): @@ -78,13 +78,16 @@ for chunk in vresp.iter_content(chunk_size=1024): if chunk: video.write(chunk) print("Video zip files downloaded in {}".format( os.path.join(download_dir, name))) # Unzip the file and save in current location with zipfile.ZipFile( os.path.join(download_dir, name), "r") as zfile: # Save the extracted file under a folder with same name # .zip extension is striped out get folder name zfile.extractall( os.path.join(download_dir, name.split(".")[0])) print("Zip file extracted") # After extraction remove original zip file -
vinovator created this gist
Jan 14, 2016 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,96 @@ # UdacityDownload.py # Python 2.7.6 """ Python script to download course content from udacity courses - Creates folders as per course names - Downloads all the zip files - Extract content from zip file - Finally delete the zip file Multiple course content can be downloaded from list """ import requests from BeautifulSoup import BeautifulSoup import zipfile import os # Parent directory under which all content to be downloaded base_dir = "../../Vinoth/Udacity/" # Udacity id of the courses of interest # Configure this list to download multiple course content course_lst = ["ud617", "ud359"] # Dict to map course id with course name courses = dict() # The catalog page lists out all courses available for download # Use this page to extract course name from course id catalog_url = "https://www.udacity.com/wiki/downloads" catalog_resp = requests.get(catalog_url) catalog_soup = BeautifulSoup(catalog_resp.content) lis = catalog_soup.findAll("li") for li in lis: for a in li("a"): # case insensitive comparison if a.getText().lower() in [course.lower() for course in course_lst]: courses[a.getText()] = li.getText() for course_id in courses: print("***Downloading {}".format(courses[course_id])) # Relative path of download directory # ../../ notation will take 3 levels up download_dir = os.path.join(base_dir, courses[course_id]) if not os.path.exists(download_dir): os.makedirs(download_dir) # ud359 is the id for "Intro to Data Science" course # Full download catalog - https://www.udacity.com/wiki/downloads course_url = "https://www.udacity.com/wiki/" + course_id + "/downloads" resp = requests.get(course_url) print("Opening url {}".format(course_url)) soup = BeautifulSoup(resp.content) # Video links are within a <li> tag lis = soup.findAll("li") for li in lis: for a in li("a"): # Filter for zip files if(a.get("href").split(".")[-1] == "zip"): link = a.get("href") # Video download link name = a.getText() # Name of file print ("{}: {}".format(name, link)) vresp = requests.get(link) with open(os.path.join(download_dir, name), "wb") as video: for chunk in vresp.iter_content(chunk_size=1024): if chunk: video.write(chunk) print("Video zip files downloaded in {}".format(os.path.join(download_dir, name))) # Unzip the file and save in current location with zipfile.ZipFile(os.path.join(download_dir, name), "r") as zfile: # Save the extracted file under a folder with same name # .zip extension is striped out get folder name zfile.extractall(os.path.join(download_dir, name.split(".")[0])) print("Zip file extracted") # After extraction remove original zip file try: os.remove(os.path.join(download_dir, name)) print("Zip file deleted") except Exception as e: print (e) continue