#!/usr/bin/python # encoding: utf-8 # Based off the script from https://gist.github.com/Paaskehare/3949299 # @author: Ash Ramesh (27/10/2016) import cookielib import urllib import urllib2 import re import os.path # Auth details email = 'ADD EMAIL HERE' password = 'ADD PASSWORD HERE' # Setup urllib opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookielib.CookieJar())) urllib2.install_opener(opener) # Base url for DAS BASE_URL = 'https://www.destroyallsoftware.com/' def login(): # Get the sign in page url = BASE_URL + 'screencasts/users/sign_in' page = urllib2.urlopen(url).read() # Get the auth token token = re.search('<input type="hidden" name="authenticity_token" value="([\w/\+=]+?)" />', page).group(1) # Build the form data values = { 'utf8': '✓', 'authenticity_token': token, 'user[email]': email, 'user[password]': password, 'commit': 'Sign in' } # Call the form with sign in data = urllib.urlencode(values) req = urllib2.Request(url, data) return urllib2.urlopen(req).read() def get_catalog_urls(): """Get a list of relative urls to each video in the catalog. Urls look like '/screencasts/catalog/clarity-via-isolated-tests' """ url = BASE_URL + 'screencasts/catalog' page = urllib2.urlopen(url).read() screencasts = re.findall('<div class="episode">\s+<a href="(.*?)">', page)[::-1] return screencasts def download_all_screencasts(relative_url_list): for screencast_url in relative_url_list: # Build filename - e.g. clarity-via-isolated-tests filename = screencast_url.split('/')[-1] full_filename = filename + '.mov' if os.path.exists(full_filename): print('Already downloaded: ' + filename + ' skipping ...') continue else: print('Attempting to retrieve and download ' + filename) # Go to the screencast page and get the source url for the video url = BASE_URL + screencast_url[1:] # Remove / from the relative url page = urllib2.urlopen(url).read() download_urls = re.findall('source.src = "(.*?)"', page) # Download the video & save to file print('Downloading "' + filename + '" ...') req = urllib2.Request(download_urls[0]) response = urllib2.urlopen(req) while 1: data = response.read(512) if not len(data): break else: with open(filename + '.mov', 'ab') as f: f.write(data) def main(): page = login() catalog_urls = get_catalog_urls() download_all_screencasts(catalog_urls) main()