#!/usr/bin/python
# encoding: utf-8

# Based off the script from https://gist.github.com/Paaskehare/3949299
# @author: Ash Ramesh (27/10/2016)

import cookielib
import urllib
import urllib2
import re
import os.path

# Auth details
email    = 'ADD EMAIL HERE'
password = 'ADD PASSWORD HERE'

# Setup urllib
opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar()))
urllib2.install_opener(opener)

# Base url for DAS
BASE_URL = 'https://www.destroyallsoftware.com/'

def login():

    # Get the sign in page
    url = BASE_URL + 'screencasts/users/sign_in'
    page = urllib2.urlopen(url).read()

    # Get the auth token
    token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1)

    # Build the form data
    values = {
        'utf8':               '✓',
        'authenticity_token': token,
        'user[email]':        email,
        'user[password]':     password,
        'commit': 'Sign in'
    }

    # Call the form with sign in
    data = urllib.urlencode(values)
    req = urllib2.Request(url, data)

    return urllib2.urlopen(req).read()

def get_catalog_urls():
    """Get a list of relative urls to each video in the catalog.
    Urls look like '/screencasts/catalog/clarity-via-isolated-tests'
    """
    url = BASE_URL + 'screencasts/catalog'
    page = urllib2.urlopen(url).read()
    screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1]
    return screencasts

def download_all_screencasts(relative_url_list):
    for screencast_url in relative_url_list:

        # Build filename - e.g. clarity-via-isolated-tests
        filename = screencast_url.split('/')[-1]
        full_filename = filename + '.mov'

        if os.path.exists(full_filename):
            print('Already downloaded: ' + filename + ' skipping ...')
            continue

        else:
            print('Attempting to retrieve and download ' + filename)

        # Go to the screencast page and get the source url for the video
        url = BASE_URL + screencast_url[1:]  # Remove / from the relative url
        page = urllib2.urlopen(url).read()
        download_urls = re.findall('source.src = "(.*?)"', page)

        # Download the video & save to file
        print('Downloading "' + filename + '" ...')
        req = urllib2.Request(download_urls[0])
        response = urllib2.urlopen(req)
        while 1:
            data = response.read(512)
            if not len(data):
                break
            else:
                with open(filename + '.mov', 'ab') as f:
                    f.write(data)

def main():
    page = login()
    catalog_urls = get_catalog_urls()
    download_all_screencasts(catalog_urls)

main()