vinovator · July 28, 2023 05:03 · Jun 24, 2016 · Jan 14, 2016 · Jan 14, 2016
diff --git a/UdacityDownload.py b/UdacityDownload.py
@@ -17,11 +17,12 @@
 
 # Parent directory under which all content to be downloaded
 # ../../ notation will take 3 levels up
-base_dir = "../../Vinoth/Udacity/"
+base_dir = "../../Vinoth/MOOC/Udacity/"
 
 # Udacity id of the courses of interest
 # Configure this list to download multiple course content
-course_lst = ["ud617", "ud359"]
+# course_lst = ["ud617", "ud359"]
+course_lst = ["ud675", "ud741", "ud820"]
 
 # Dict to map course id with course name
 courses = dict()
@@ -45,8 +46,9 @@
 
     print("***Downloading {}".format(courses[course_id]))
 
-    # Construct the path to download content
-    download_dir = os.path.join(base_dir, courses[course_id])
+    # Construct the path to download content, and strip unwanted characters
+    download_dir = os.path.join(base_dir, courses[course_id].replace(":", ""))
+    print download_dir
 
     if not os.path.exists(download_dir):
         os.makedirs(download_dir)
@@ -82,18 +84,22 @@
                     os.path.join(download_dir, name)))
 
                 # Unzip the file and save in current location
-                with zipfile.ZipFile(
-                        os.path.join(download_dir, name), "r") as zfile:
-                    # Save the extracted file under a folder with same name
-                    # .zip extension is striped out get folder name
-                    zfile.extractall(
-                        os.path.join(download_dir, name.split(".")[0]))
-                print("Zip file extracted")
+                try:
+                    with zipfile.ZipFile(
+                            os.path.join(download_dir, name), "r") as zfile:
+                        # Save the extracted file under a folder with same name
+                        # .zip extension is striped out get folder name
+                        zfile.extractall(
+                            os.path.join(download_dir, name.split(".")[0]))
+                    print("Zip file extracted")
+                except Exception as e:
+                    print (e)
+                    continue
 
                 # After extraction remove original zip file
                 try:
                     os.remove(os.path.join(download_dir, name))
                     print("Zip file deleted")
                 except Exception as e:
                     print (e)
-                    continue
+                    continue
diff --git a/UdacityDownload.py b/UdacityDownload.py
@@ -16,6 +16,7 @@
 import os
 
 # Parent directory under which all content to be downloaded
+# ../../ notation will take 3 levels up
 base_dir = "../../Vinoth/Udacity/"
 
 # Udacity id of the courses of interest
@@ -44,8 +45,7 @@
 
     print("***Downloading {}".format(courses[course_id]))
 
-    # Relative path of download directory
-    # ../../ notation will take 3 levels up
+    # Construct the path to download content
     download_dir = os.path.join(base_dir, courses[course_id])
 
     if not os.path.exists(download_dir):
@@ -78,13 +78,16 @@
                     for chunk in vresp.iter_content(chunk_size=1024):
                         if chunk:
                             video.write(chunk)
-                print("Video zip files downloaded in {}".format(os.path.join(download_dir, name)))
+                print("Video zip files downloaded in {}".format(
+                    os.path.join(download_dir, name)))
 
                 # Unzip the file and save in current location
-                with zipfile.ZipFile(os.path.join(download_dir, name), "r") as zfile:
+                with zipfile.ZipFile(
+                        os.path.join(download_dir, name), "r") as zfile:
                     # Save the extracted file under a folder with same name
                     # .zip extension is striped out get folder name
-                    zfile.extractall(os.path.join(download_dir, name.split(".")[0]))
+                    zfile.extractall(
+                        os.path.join(download_dir, name.split(".")[0]))
                 print("Zip file extracted")
 
                 # After extraction remove original zip file

diff --git a/UdacityDownload.py b/UdacityDownload.py
@@ -0,0 +1,96 @@
+# UdacityDownload.py
+# Python 2.7.6
+
+"""
+Python script to download course content from udacity courses
+- Creates folders as per course names
+- Downloads all the zip files
+- Extract content from zip file
+- Finally delete the zip file
+Multiple course content can be downloaded from list
+"""
+
+import requests
+from BeautifulSoup import BeautifulSoup
+import zipfile
+import os
+
+# Parent directory under which all content to be downloaded
+base_dir = "../../Vinoth/Udacity/"
+
+# Udacity id of the courses of interest
+# Configure this list to download multiple course content
+course_lst = ["ud617", "ud359"]
+
+# Dict to map course id with course name
+courses = dict()
+
+# The catalog page lists out all courses available for download
+# Use this page to extract course name from course id
+catalog_url = "https://www.udacity.com/wiki/downloads"
+
+catalog_resp = requests.get(catalog_url)
+
+catalog_soup = BeautifulSoup(catalog_resp.content)
+
+lis = catalog_soup.findAll("li")
+for li in lis:
+    for a in li("a"):
+        # case insensitive comparison
+        if a.getText().lower() in [course.lower() for course in course_lst]:
+            courses[a.getText()] = li.getText()
+
+for course_id in courses:
+
+    print("***Downloading {}".format(courses[course_id]))
+
+    # Relative path of download directory
+    # ../../ notation will take 3 levels up
+    download_dir = os.path.join(base_dir, courses[course_id])
+
+    if not os.path.exists(download_dir):
+        os.makedirs(download_dir)
+
+    # ud359 is the id for "Intro to Data Science" course
+    # Full download catalog - https://www.udacity.com/wiki/downloads
+    course_url = "https://www.udacity.com/wiki/" + course_id + "/downloads"
+
+    resp = requests.get(course_url)
+
+    print("Opening url {}".format(course_url))
+
+    soup = BeautifulSoup(resp.content)
+
+    # Video links are within a <li> tag
+    lis = soup.findAll("li")
+
+    for li in lis:
+        for a in li("a"):
+            # Filter for zip files
+            if(a.get("href").split(".")[-1] == "zip"):
+                link = a.get("href")  # Video download link
+                name = a.getText()  # Name of file
+                print ("{}: {}".format(name, link))
+
+                vresp = requests.get(link)
+
+                with open(os.path.join(download_dir, name), "wb") as video:
+                    for chunk in vresp.iter_content(chunk_size=1024):
+                        if chunk:
+                            video.write(chunk)
+                print("Video zip files downloaded in {}".format(os.path.join(download_dir, name)))
+
+                # Unzip the file and save in current location
+                with zipfile.ZipFile(os.path.join(download_dir, name), "r") as zfile:
+                    # Save the extracted file under a folder with same name
+                    # .zip extension is striped out get folder name
+                    zfile.extractall(os.path.join(download_dir, name.split(".")[0]))
+                print("Zip file extracted")
+
+                # After extraction remove original zip file
+                try:
+                    os.remove(os.path.join(download_dir, name))
+                    print("Zip file deleted")
+                except Exception as e:
+                    print (e)
+                    continue