Skip to content

Instantly share code, notes, and snippets.

@vinovator
Last active July 28, 2023 05:03

Revisions

  1. vinovator revised this gist Jun 24, 2016. 1 changed file with 18 additions and 12 deletions.
    30 changes: 18 additions & 12 deletions UdacityDownload.py
    Original file line number Diff line number Diff line change
    @@ -17,11 +17,12 @@

    # Parent directory under which all content to be downloaded
    # ../../ notation will take 3 levels up
    base_dir = "../../Vinoth/Udacity/"
    base_dir = "../../Vinoth/MOOC/Udacity/"

    # Udacity id of the courses of interest
    # Configure this list to download multiple course content
    course_lst = ["ud617", "ud359"]
    # course_lst = ["ud617", "ud359"]
    course_lst = ["ud675", "ud741", "ud820"]

    # Dict to map course id with course name
    courses = dict()
    @@ -45,8 +46,9 @@

    print("***Downloading {}".format(courses[course_id]))

    # Construct the path to download content
    download_dir = os.path.join(base_dir, courses[course_id])
    # Construct the path to download content, and strip unwanted characters
    download_dir = os.path.join(base_dir, courses[course_id].replace(":", ""))
    print download_dir

    if not os.path.exists(download_dir):
    os.makedirs(download_dir)
    @@ -82,18 +84,22 @@
    os.path.join(download_dir, name)))

    # Unzip the file and save in current location
    with zipfile.ZipFile(
    os.path.join(download_dir, name), "r") as zfile:
    # Save the extracted file under a folder with same name
    # .zip extension is striped out get folder name
    zfile.extractall(
    os.path.join(download_dir, name.split(".")[0]))
    print("Zip file extracted")
    try:
    with zipfile.ZipFile(
    os.path.join(download_dir, name), "r") as zfile:
    # Save the extracted file under a folder with same name
    # .zip extension is striped out get folder name
    zfile.extractall(
    os.path.join(download_dir, name.split(".")[0]))
    print("Zip file extracted")
    except Exception as e:
    print (e)
    continue

    # After extraction remove original zip file
    try:
    os.remove(os.path.join(download_dir, name))
    print("Zip file deleted")
    except Exception as e:
    print (e)
    continue
    continue
  2. vinovator revised this gist Jan 14, 2016. 1 changed file with 8 additions and 5 deletions.
    13 changes: 8 additions & 5 deletions UdacityDownload.py
    Original file line number Diff line number Diff line change
    @@ -16,6 +16,7 @@
    import os

    # Parent directory under which all content to be downloaded
    # ../../ notation will take 3 levels up
    base_dir = "../../Vinoth/Udacity/"

    # Udacity id of the courses of interest
    @@ -44,8 +45,7 @@

    print("***Downloading {}".format(courses[course_id]))

    # Relative path of download directory
    # ../../ notation will take 3 levels up
    # Construct the path to download content
    download_dir = os.path.join(base_dir, courses[course_id])

    if not os.path.exists(download_dir):
    @@ -78,13 +78,16 @@
    for chunk in vresp.iter_content(chunk_size=1024):
    if chunk:
    video.write(chunk)
    print("Video zip files downloaded in {}".format(os.path.join(download_dir, name)))
    print("Video zip files downloaded in {}".format(
    os.path.join(download_dir, name)))

    # Unzip the file and save in current location
    with zipfile.ZipFile(os.path.join(download_dir, name), "r") as zfile:
    with zipfile.ZipFile(
    os.path.join(download_dir, name), "r") as zfile:
    # Save the extracted file under a folder with same name
    # .zip extension is striped out get folder name
    zfile.extractall(os.path.join(download_dir, name.split(".")[0]))
    zfile.extractall(
    os.path.join(download_dir, name.split(".")[0]))
    print("Zip file extracted")

    # After extraction remove original zip file
  3. vinovator created this gist Jan 14, 2016.
    96 changes: 96 additions & 0 deletions UdacityDownload.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,96 @@
    # UdacityDownload.py
    # Python 2.7.6

    """
    Python script to download course content from udacity courses
    - Creates folders as per course names
    - Downloads all the zip files
    - Extract content from zip file
    - Finally delete the zip file
    Multiple course content can be downloaded from list
    """

    import requests
    from BeautifulSoup import BeautifulSoup
    import zipfile
    import os

    # Parent directory under which all content to be downloaded
    base_dir = "../../Vinoth/Udacity/"

    # Udacity id of the courses of interest
    # Configure this list to download multiple course content
    course_lst = ["ud617", "ud359"]

    # Dict to map course id with course name
    courses = dict()

    # The catalog page lists out all courses available for download
    # Use this page to extract course name from course id
    catalog_url = "https://www.udacity.com/wiki/downloads"

    catalog_resp = requests.get(catalog_url)

    catalog_soup = BeautifulSoup(catalog_resp.content)

    lis = catalog_soup.findAll("li")
    for li in lis:
    for a in li("a"):
    # case insensitive comparison
    if a.getText().lower() in [course.lower() for course in course_lst]:
    courses[a.getText()] = li.getText()

    for course_id in courses:

    print("***Downloading {}".format(courses[course_id]))

    # Relative path of download directory
    # ../../ notation will take 3 levels up
    download_dir = os.path.join(base_dir, courses[course_id])

    if not os.path.exists(download_dir):
    os.makedirs(download_dir)

    # ud359 is the id for "Intro to Data Science" course
    # Full download catalog - https://www.udacity.com/wiki/downloads
    course_url = "https://www.udacity.com/wiki/" + course_id + "/downloads"

    resp = requests.get(course_url)

    print("Opening url {}".format(course_url))

    soup = BeautifulSoup(resp.content)

    # Video links are within a <li> tag
    lis = soup.findAll("li")

    for li in lis:
    for a in li("a"):
    # Filter for zip files
    if(a.get("href").split(".")[-1] == "zip"):
    link = a.get("href") # Video download link
    name = a.getText() # Name of file
    print ("{}: {}".format(name, link))

    vresp = requests.get(link)

    with open(os.path.join(download_dir, name), "wb") as video:
    for chunk in vresp.iter_content(chunk_size=1024):
    if chunk:
    video.write(chunk)
    print("Video zip files downloaded in {}".format(os.path.join(download_dir, name)))

    # Unzip the file and save in current location
    with zipfile.ZipFile(os.path.join(download_dir, name), "r") as zfile:
    # Save the extracted file under a folder with same name
    # .zip extension is striped out get folder name
    zfile.extractall(os.path.join(download_dir, name.split(".")[0]))
    print("Zip file extracted")

    # After extraction remove original zip file
    try:
    os.remove(os.path.join(download_dir, name))
    print("Zip file deleted")
    except Exception as e:
    print (e)
    continue