joeromero · September 2, 2013 22:05
diff --git a/c_common.php b/c_common.php
 <?php

 /* ===================== COMMON CODE ============================ 
   
   This file is be used by two other scripts.

   2013-07-15    * bash printing functions are wrapped into class

   2013-06-29    * technical change to make c_get work

   2013-05-15, 2 * polished code for other scripts needs

   2013-05-15    * suppressed warning on lost internet connection

   2013-05-06    * generating "wget" command with all variables passed
                   from shell

   2013-05-03    * more shared code put in here

   2013-04-24    * added "chapter" phrase for removal
    
   2013-04-22    * added function for dropping a week/lecture phrase
                 * removing multiple whitespaces in a row
   
   2013-04-15    * added Coursera CSS getters and more shared code

   2013-02-15    * special function for whitespace removal

   2013-02-11    * converting character 0xc2 (?) and hard space to space

   2012-10-19    * question marks in filenames are converted to spaces 
                   (because of MS-Windows)

   2012-09-18    * initial release 

   ================================================================== */

 // ---- Coursera specific stuff ----------------------------------------

 $split_dirs_key = '--split_dirs';
 $drop_deco_key = '--drop_week';
 $reverse_key = '--reverse';
 $beep_key = '--beep';
 $debug_key = '--debug';
 $limit_key = '--limit';

 $debug_page = NULL;

 function c_query_groups($xpath)
 {
 	return $xpath->query('//div[contains(@class,"course-item-list-header")]');
 }

 function drop_deco($name)
 {
 	$s = $name;
 	$s = preg_replace('/^(Week|Lecture|Chapter)[\s]*\d+[.:\s\-]*/','',$s);
 	
 	if ($s=='')
 		return $name;
 	else
 		return $s;
 }

 function c_query_dir($xpath,$group)
 {
 	$dir = coursera_trim($xpath->query('./h3',$group)->item(0)->nodeValue);
 	return $dir;
 }

 function c_deco_dir($dir,$group_count,$drop_deco)
 {
 	if ($drop_deco)
 		$dir = drop_deco($dir);
 		
 	return str_pad($group_count,2,'0',STR_PAD_LEFT).'. '.fix_filename($dir);
 }

 function c_query_list($xpath,$group)
 {
 	return $xpath->query('.//li',$group->nextSibling);
 }

 function c_query_row($xpath,$node,$drop_deco,&$row,&$title)
 {
 	$row = $xpath->query('.//a[contains(@class,"lecture-link")]',$node)->item(0);
 			
 	$title = fix_filename($row->firstChild->nodeValue);
 	if ($drop_deco)
 		$title = drop_deco($title);
 }


 function c_get_embedded_links($row,$ext,$session = NULL)
 {
 	$frame = trim($row->attributes->getNamedItem('data-modal-iframe')->nodeValue);
 	// lectures in preview mode are put at external pages, so we have to download them extra
 	$view = get_page_xpath($frame,$session);
 			
 	if (!$view)
 		return NULL;
 	else		
 	{
 		$links = $view->query('//video[@id="QL_video_element_first"]/source[@type="video/'.$ext.'"]');
 		if ($links->length===0)
 			$links = $view->query('//div[@id="QL_player_container_first"]//source[@type="video/'.$ext.'"]');
 			
 		return $links;
 	}
 }

 /*function c_get_embedded_links2($row,$ext,$session = NULL)
 {
 	$frame = trim($row->attributes->getNamedItem('data-modal-iframe')->nodeValue);
 	// lectures in preview mode are put at external pages, so we have to download them extra
 	$view = get_page_xpath($frame,$session);
 			
 	if (!$view)
 		return NULL;
 	else		
 		return $view->query('//div[@id="QL_player_container_first"]//embed[@id="me_flash_0" and ends-with(@flashvars,".'.$ext.'")]');
 }*/

 function coursera_trim($s)
 {
 	return trim(strtr($s,"\xa0\xc2",'  '));
 }

 // ---- general php code -----------------------------------------------

 class BashPrinter
 {
 	private $dirLines = array();
 	
 	public function wget_file_print($link,$target_filename,$log = NULL,$session = NULL)
 	{
 		foreach ($this->dirLines as $s)
 			echo $s;
 		$this->dirLines = array();

 		echo 'if [ ! -e "'.$target_filename.'" ] ; then'."\n";
 		echo '  wget $@ -nc --no-cookies ';
 		if ($session!==NULL)
 			echo ' --header "Cookie: session='.$session.'" ';
 		echo '"'.$link.'" -O "'.$target_filename.'"'."\n";
 		echo '  if [ $? -ne 0 ]'."\n";
 		echo '  then'."\n";
 		echo '    rm -f "'.$target_filename.'"; ERRORS=$((ERRORS+1))'."\n";
 		if ($log!==NULL) 
 		{
 			echo '  else'."\n";
 			echo '    echo "'.$link.'" >> '.$log."\n";
 		}
 		echo '  fi'."\n";
 		echo 'fi'."\n";
 	}
 	
 	public function mkdir_print($dir,$extras)
 	{
 		global $split_dirs_key;
 		
 		$this->dirLines = array();
 		
 		$this->dirLines[] = "\n";
 		if (array_key_exists($split_dirs_key,$extras))
 		{
 			foreach ($extras[$split_dirs_key] as $d)
 				$this->dirLines[] = 'mkdir -p "'.$d.'/'.$dir.'"'."\n"; 
 		}
 		else
 			$this->dirLines[] = 'mkdir "'.$dir.'"'."\n";
 	}
 }

 function process_extra_arguments(&$extras)	
 {
 	global $split_dirs_key;
 	if (array_key_exists($split_dirs_key,$extras))
 		$extras[$split_dirs_key] = explode(' ',$extras[$split_dirs_key]);
 }

 function get_dom($content)
 {
 	$dom = new DOMDocument();
 	$errors_mode = libxml_use_internal_errors(TRUE);
 	$content = mb_convert_encoding($content, 'HTML-ENTITIES', "UTF-8");
 	$dom->loadHTML($content);
 	libxml_clear_errors();
 	libxml_use_internal_errors($errors_mode);
 	$dom->preserveWhiteSpace = false; 
 			
 	return $dom;
 }

 function fix_filename($s)
 {
 	return preg_replace('/\s{2,}/', ' ',coursera_trim(strtr($s,'?:"/\\',' .\'__')));
 }

 function get_page_xpath($url,$session = NULL)
 {
 	global $debug_page;

 	$http = array('method'=>'GET');
 	if ($session!==NULL)
 		$http['header'] = 'Cookie: session='.$session.';';
 		
 	$context = stream_context_create(array('http'=> $http));

 	$content = @file_get_contents($url,false,$context);
 	$debug_page = $content;
 		
 	if ($content===FALSE)
 		return NULL;
 	
 	$dom = get_dom($content);
 	$xpath = new DOMXPath($dom);
 	
 	return $xpath;
 }



 ?>
diff --git a/c_get.php b/c_get.php
 <?php

 /* ===================== COURSERA GETTER ============================ 
   tags: [coursera video download] [coursera lecture download]
 
 CHANGELOG:
 ---------
 
   2013-08-06    * additional info from tooltip is processed to be used 
                   as a filename

   2013-07-15    * if there is nothing to download for given folder
                   it is not created

   2013-06-29    * updated extraction for embedded videos
                 * added option to limit extraction from main page     

   2013-05-15, 2 * corrected reporting failed download or extraction

   2013-05-15    * reporting failed extraction of main resource
                    
   2013-05-03    * more unification with c_preview utility -- ability to 
                   download embedded videos as well (read: webm), add 
                   "~" character before file extension to make Coursera 
                   Getter fetch embedded video
                 * "--extension" option in no longer supported -- add 
                   dot (".") before file extension instead
                     
   2013-04-22    * dropping a week/lecture phrase from filenames as well
   
   2013-04-15    * refactoring

   2013-04-07    * keeping log of downloads (file "downloads.log") as 
                   countermeasure for renaming the lectures/notes
                 * added "beep" option to make a sound at the end of 
                   downloading

   2013-04-03    * bugfix: the title of lecture sometimes was ignored

   2013-02-15    * more accurate whitespace removal

   2013-02-02    * automatically removes corrupted files

   2013-01-23    * new option "reverse" for the courses which put 
                   sections in "from newest to oldest" order 

   2013-01-08    * Coursera changed its web format, along with structure
                   and CSS tags/classes this version hopefully is
                   changed to reflect all of those

   2012-10-12    * added option --drop_week to drop "week X." part from
                   the directory (remind me this was supposed to be dead
                   simple tool ;-D)  

   2012-10-11    * added option --split_dirs to save files into 
                   subdirectories according of files extensions  

   2012-10-03    * added option --extension to get resources by 
                   extension of the files, not the tooltips
    
   2012-09-18    * added c_common.php
                 * you can specify via filetypes what to grab and what
                   extension set
    
   2012-07-05    * initial release of Coursera preview getter

   2012-06-14    * added little control if there is insufficient number
                   of arguments

   2012-06-11    * UTF-8 in filenames are supported
                   (another module for PHP is required -- mbstring)
                 * replaces slash and backslash with underscore

   2012-06-07    * changed this info, added another way of getting 
                   cookies

   2012-06-06, 2 * extensions casing reverted -- they matter again
                 * directories are named according to Lectures sections
                 * handles multiple files for given file type
                 * files are counted within each directory, not within 
                   entire course
   
   2012-06-06    * extensions can be given lower/upper-case, they do not
                   matter
   
   2012-06-05, 2 * creates weekly subdirectories and puts the files in
                   there
   
   2012-06-05    * initial release

 WHAT IT DOES:
 ------------

  * it parses given course Lectures page 
  * it extracts all the desired content (links for videos, slides, etc)
  * it uses consistent naming of the files
  * it replaces colon with period (hello Windows users)
  * it finally creates a bunch of wget command ready to execute
  * it ignores already existing files, so it is safe to rerun wget 
    script just to get missing files (note this might be not true if you
    update this script, because of possible change in naming convention)

 WHAT YOU NEED:
 -------------
  
  1. proper shell (Windows users -- of course I recommend switching to
     Linux entirely, but as a workaround Cygwin should be fine -- I 
     don't know how about the tools I mention below)
  2. wget (in openSUSE `sudo zypper in wget`)
  3. php5 (in openSUSE `sudo zypper in php5`)
  4. php5-openssl (in openSUSE `sudo zypper in php5-openssl`)
  5. php5-mbstring (in openSUSE `sudo zypper in php5-mbstring`)
  6. and an adventurous soul -- in Firefox, go to 
     Edit/Preferences/Privacy/Remove Individual Cookie (don't freak
     out!) search for "coursera". Several items should appear -- look 
     for key session for the site you would like to download (for 
     example "nlp"). Copy the value (content) of that key. Close the 
     preferences window (do **NOT** delete anything!) -- I will be 
     grateful for info if there is easier way

 Ok, so now you know the address of the site, the session, and the files
 you would like to download.

 Jan de Vos sent another way for getting cookies (step 5):

  * find the cookies directory -- in case of Linux it will be something 
    like this `~/.mozilla/firefox/88xw1k8g.default/`
  * run sqlite3 -- `sqlite3 cookies.sqlite`
  * run SQL query -- `select path,value from moz_cookies where 
    baseDomain = 'coursera.org' and name='session';`
   
 You will get the session codes for all courses you are enrolled on.
  
 USAGE:
 -----

    php c_get.php "link_to_lectures_page" "file types" "session code" > wget_script_name.sh
    sh wget_script_name.sh

 Example (this is one line):

    php c_get.php "https://class.coursera.org/crypto/lecture/index" "MP4 PDF" "HERE&IS%MY&SESSION^VALUE@WHICH*OF!COURSE*I_WONT*TELL9YOU" > wgetter.sh

 the one above creates appropriate script for wget for downloading videos 
 (MP4) and slides (PDF). 

 Please note the file type casing (MP4 vs. mp4) must match the casing of
 the title (tooltip) of given category of files -- check the Lectures 
 page to find it out.
 
 It is possible to pass file type in format "FileFormat=FileExtension",
 so this script will look for one thing, but save as another. For example some courses list pdf files as "Slides". In such case pass such file format "Slides=pdf" -- this mean "Slides" will be grabbed, but saved with extension "pdf".

 Some courses do not use consistent naming of tooltips (unfortunately),
 in such case you can download files directly by extension -- add dot
 (".") character in front of tile type. As previously, pay attention to 
 lowercase/uppercase (e.g. usually the extension is "mp4" but tooltip is
 "MP4"). Example:

    php c_get.php "https://class.coursera.org/scala/lecture/index" ".mp4 .pdf" "HERE&IS%MY&SESSION" > wgetter.sh
     
 Yet another source of files are embedded frames (the ones when you click
 to view lecture online). One of the advantages of this is ability to
 download video in webm format. Instead of "." use now "~", for example:

    php c_get.php "https://class.coursera.org/scala/lecture/index" "~webm .pdf" "HERE&IS%MY&SESSION" > wgetter.sh
      
 NOTE: the video will be downloaded from embedded player, but handouts
 (pdf) will be downloaded from download (resources) section.
 
 If you would like to have notes in the "notes" subdirectory and lectures
 in "lectures" one add "--split_dirs" argument in such way:

    php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --split_dirs="videos texts" > wgetter.sh

 so "mp4" files will go into "videos" subdirectory and "pdf" files into
 "texts" subdirectory.

 If the directories with openining "Week X." seem redundant add 
 "--drop_week" option:
    
    php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --drop_week > wgetter.sh

 Instead of having "02. Week 1: Functions & Evaluations" you will get
 "02. Functions & Evaluations".

 For courses which do not use natural order (from oldest to newest) there
 is an option "reverse":

    php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --reverse > wgetter.sh

 This will tell this script to use reversed order of numbering sections.
 
 The courses with embedded videos are harder to process -- extraction 
 takes more time. If you know in advance that you don't want to extract 
 some portion of the lectures you can pass the limit option:

    php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --limit="Week 9" > wgetter.sh
    
 This will start extraction from section containing phrase "Week 9". In 
 case of reversed order -- it will stop extraction on phrase "Week 9".
 
 In all above examples, video lecture (mp4/webm) came first -- the
 program assumes it is the main resource, and if it is missing it will 
 report this fact. It won't report missing resource of any other kind.
 
 Once the actual getter script is created (here: wgetter.sh) you can pass
 any extra option for "wget". For example you can run it as:

    sh wgetter.sh --limit-rate=100k

 This would limit speed of download to 100KB/s. See "man wget" for more 
 options.
 *     
 SECURITY NOTE:
 -------------

 Do NOT share your session code with anyone, and this means -- do NOT
 share the wget script with anyone as well!
 
   ================================================================== */
 
 require_once 'c_common.php';

 function print_wget($xpath,$session,$extensions,$extras)
 {
 	global $split_dirs_key,$drop_deco_key,$reverse_key,$beep_key,$debug_key,$limit_key;
 	global $debug_page;
 	
 	$bash_printer = new BashPrinter();
 	
 	process_extra_arguments($extras);

 	// done with extra arguments ---------------------------------------

    $downloads_filename = 'downloads.log';
      
    $downloads = array();
    if (file_exists($downloads_filename))
 		$downloads = file($downloads_filename, FILE_IGNORE_NEW_LINES | FILE_SKIP_EMPTY_LINES);

 	echo "ERRORS=0\n";
 	
 	$group_list = c_query_groups($xpath);

 	$group_count = array_key_exists($reverse_key,$extras) ? $group_list->length : 1;

    if (array_key_exists($reverse_key,$extras))
       // in reverse order pretend limit was NOT hit
 		$limit_hit = false; 
    else
        // if there is no limit given by user, pretend it was hit
 		$limit_hit = !array_key_exists($limit_key,$extras); 

 	foreach ($group_list as $group)
 	{
 		$item_count = 0;
 		
 		$dir = c_query_dir($xpath,$group);
 		
 		if (!$limit_hit && array_key_exists($limit_key,$extras))
 			$limit_hit = (strpos($dir,$extras[$limit_key])!==FALSE);
 		
 		$dir = c_deco_dir($dir,$group_count,array_key_exists($drop_deco_key,$extras));
 		$group_count += array_key_exists($reverse_key,$extras) ? -1 : +1;

 		if (array_key_exists($limit_key,$extras))
 		{
 			if (array_key_exists($reverse_key,$extras))
 			{
 				if ($limit_hit)
 					break;
 			}
 			elseif (!$limit_hit)
 				continue;
 		}
 		
 		$bash_printer->mkdir_print($dir,$extras);
 		
 		$node_list = c_query_list($xpath,$group);
 		
 		foreach ($node_list as $node)
 		{
 			++$item_count;
 			
 			c_query_row($xpath,$node,array_key_exists($drop_deco_key,$extras),$row,$title);
 			
 			// each $ext_combo might be in such forms: either "FileType" or "FileType=FileExtension" (e.g. "PDF", "Slides=pdf")
 			for ($i_ext = 0; $i_ext < count($extensions); ++$i_ext) 
 			{
 				$ext_parts = explode('=',$extensions[$i_ext]);

 				if (array_key_exists($split_dirs_key,$extras))
 					$target_dir = $extras[$split_dirs_key][$i_ext].'/'.$dir;
 				else
 					$target_dir = $dir;
 				
 				$attr_extractor = 'href';
 				
 				if ($ext_parts[0][0]=='~') // extract link by extension from viewer frame
 				{
 					$ext_parts[0] = substr($ext_parts[0],1);

 			        $links = c_get_embedded_links($row,$ext_parts[0],$session);
 			        
 			        if ($links===NULL)
 			        {
 						file_put_contents('php://stderr', "Loading embedded frame failed: '$dir/$title'\n");
 						continue;
 					}
 					else if ($links->length===0 && $i_ext===0)
 					{
 						file_put_contents('php://stderr', "No resources '$ext_parts[0]' found for '$dir/$title'\n");
 						if (array_key_exists($debug_key,$extras))
 							file_put_contents('DEBUG_'.$title,$debug_page);
 						continue;

 /*						$links = c_get_embedded_links2($row,$ext_parts[0],$session);
 			        
 						if ($links===NULL)
 						{
 							file_put_contents('php://stderr', "Loading fallback embedded frame failed: '$dir/$title'\n");
 							continue;
 						}
 						else if ($links->length===0 && $i_ext===0)
 						{
 							file_put_contents('php://stderr', "No fallback resources '$ext_parts[0]' found for '$dir/$title'\n");
 							if (array_key_exists($debug_key,$extras))
 								file_put_contents('DEBUG_'.$title,$debug_page);
 							continue;
 						}
 						else
 							$attr_extractor = 'flashvars';*/
 					}
 					else
 						$attr_extractor = 'src';
 					
 					if (array_key_exists($debug_key,$extras))
 						file_put_contents('php://stderr', "For $dir/$title ".$links->length." '$ext_parts[0]' links found.\n");
 			
 				}
 				else if ($ext_parts[0][0]=='.') // extract link by extension of the linked file
 				{
 					$links = $xpath->query('.//div[@class="course-lecture-item-resource"]/a[contains(@href,"'.$ext_parts[0].'")]',$node);
 					$ext_parts[0] = substr($ext_parts[0],1);
 				}
 				else // extract link by tooltip of the link
 					$links = $xpath->query('.//a[contains(@title,"'.$ext_parts[0].'")]',$node);

 				$match = FALSE;
 						
 				foreach ($links as $link)
 				{
 					$suffix = '';
 					if ($links->length>1)
 						$suffix = '.'.fix_filename($link->attributes->getNamedItem('title')->nodeValue);
 					
 					$link = $link->attributes->getNamedItem($attr_extractor)->nodeValue;
 					if ($attr_extractor=='flashvars')
 					{
 						$url_idx = strpos($link,'&file=http');
 						$link = urldecode(substr($link,$url_idx+strlen('&file=')));
 					}

 					if (!in_array($link,$downloads))
 					{
 						$target_filename = $target_dir.'/'.str_pad($item_count,3,'0',STR_PAD_LEFT).'. '.$title.$suffix.'.'.strtolower(end(array_values($ext_parts)));
 						$bash_printer->wget_file_print($link,$target_filename,$downloads_filename,$session);
 						$match = TRUE;
 					}
 					else if (array_key_exists($debug_key,$extras))
 						file_put_contents('php://stderr', "$dir/$title '$ext_parts[0]' already downloaded.\n");
 				}
 			}
 			
 		}
 	}
 	
 	echo "\n";
 	echo 'if [ $ERRORS -ne 0 ] ; then echo "There were some errors while downloading. Run the script again." ; fi'."\n";
 		
 	if (array_key_exists($beep_key,$extras))
 		echo "beep\n";
 }

 if ($argc<4)
 {
 	file_put_contents('php://stderr', "Error: you should input minimum three arguments, the usage is:\n");
 	file_put_contents('php://stderr', "\"LECTURES_URL\" \"FILE_TYPES\" \"SESSION_CODE\" [--$beep_key] [--$reverse_key] [--$drop_deco_key] [--$split_dirs_key=\"directories per file type\"]\n");
 }
 else
 {
 	array_shift($argv);
 	
 	$url = array_shift($argv);
 	$extensions = explode(' ',array_shift($argv));
 	$session = array_shift($argv);

 	$extras = array();
 	foreach ($argv as $a)
 	{
 		$parts = explode('=',$a);
 		if (!in_array($parts[0],array($split_dirs_key,$drop_deco_key,$reverse_key,$beep_key,$debug_key,$limit_key)))
 		{
 			file_put_contents('php://stderr', 'Unknown extra argument "'.$parts[0]."\"\n");
 			exit(1);
 		}
 			
 		$extras[$parts[0]] = count($parts)==1 ? NULL : $parts[1];
 	}	
 	
 	$xpath = get_page_xpath($url,$session);
 	if ($xpath!==NULL)
 		print_wget($xpath,$session,$extensions,$extras);
 }
 	
 ?>
diff --git a/c_preview.php b/c_preview.php
 <?php

 /* ===================== COURSERA PREVIEW GETTER ======================= 
   tags: [coursera video download] [coursera lecture download]

 CHANGELOG:
 ---------

   2013-07-15    * if there is nothing to download for given folder
                   it is not created

   2013-06-29    * technical change to work with c_common

   2013-05-15    * reporting failed download or extraction of main 
                   resource

   2013-05-03    * just keeping in sync with c_get

   2013-04-22    * dropping a week/lecture phrase from filenames as well
   
   2013-04-15    * update to follow last Coursera changes

   2012-10-12    * sharing code with c_common.php;
                   some fixes to follow Coursera changes of preview sites
   
   2012-07-05    * initial release

 WHAT IT DOES:
 ------------

  * it is counterpart for Coursera getter, but this one works only for course previews 
    -- the ones with embedded video player, and nothing else

 WHAT YOU NEED:
 -------------
  
  1. proper shell (Windows users -- of course I recommend switching to Linux entirely, but as a workaround Cygwin should be fine -- I don't know how about the tools I mention below)
  2. wget (in openSUSE `sudo zypper in wget`)
  3. php5 (in openSUSE `sudo zypper in php5`)
  4. php5-mbstring (in openSUSE `sudo zypper in php5-mbstring`)

 USAGE:
 -----

    php c_preview.php "link_to_preview_page" "video_file_type" > wget_script_name.sh
    sh wget_script_name.sh

 Example (this is one line):

    php c_preview.php "https://class.coursera.org/crypto-preview/lecture/index" "mp4"

 the one above creates appropriate script for wget for downloading videos (MP4). Now execute

    sh wgetter.sh

 Please note the file type is not guaranteed to exists on the server
 (so far "webm" and "mp4" are supported by Coursera).
 
   ================================================================== */
 
 require_once 'c_common.php';
 
 // https://class.coursera.org/machlearning-001/lecture/preview/index
 function print_wget($xpath,$ext,$extras)
 {
 	global $split_dirs_key,$drop_deco_key,$extension_key,$reverse_key,$beep_key;
 	
 	$bash_printer = new BashPrinter();
 	
 	process_extra_arguments($extras);

 	$group_count = 0;
 	
 	echo "ERRORS=0\n";
 	
 	$group_list = c_query_groups($xpath);
 	$group_count = array_key_exists($reverse_key,$extras) ? $group_list->length : 1;
 	
 	foreach ($group_list as $group)
 	{
 		$item_count = 0;
 		
 		$dir = c_deco_dir(c_query_dir($xpath,$group),$group_count,array_key_exists($drop_deco_key,$extras));
 		$group_count += array_key_exists($reverse_key,$extras) ? -1 : +1;
 		
 		$bash_printer->mkdir_print($dir,$extras);
 		
 		// get the list of all lectures within current group (week)
 		$node_list = c_query_list($xpath,$group);
 		
 		foreach ($node_list as $node)
 		{
 			++$item_count;
 			
 			c_query_row($xpath,$node,array_key_exists($drop_deco_key,$extras),$row,$title);

 	        $video_list = c_get_embedded_links($row,$ext);

 			if ($video_list===NULL)
 	        {
 				file_put_contents('php://stderr', "Loading embedded frame failed: '$dir/$title'\n");
 				continue;
 			}
 			else if ($video_list->length==0)
 			{
 				file_put_contents('php://stderr', "Filetype '$ext' not found for '".$title."'\n");
 				continue;
 			}

 			$video = $video_list->item(0);
 			$vid_src = $video->attributes->getNamedItem('src')->nodeValue;
 			
 			$bash_printer->wget_file_print($vid_src,$dir.'/'.str_pad($item_count,3,'0',STR_PAD_LEFT).'. '.$title.'.'.strtolower($ext));

 		}
 	}

 	echo 'if [ $ERRORS -ne 0 ] ; then echo "There were some errors while downloading. Run the script again." ; fi'."\n";
 		
 	if (array_key_exists($beep_key,$extras))
 		echo "beep\n";
 }

 if ($argc<3)
 {
 	file_put_contents('php://stderr', "Error: you should input minimum two arguments, the usage is:\n");
 	file_put_contents('php://stderr', "\"LECTURES_URL\" \"FILE_TYPES\" [--$beep_key] [--$reverse_key] [--$drop_deco_key] [--$split_dirs_key=\"directories per file type\"]\n");
 }
 else
 {
 	array_shift($argv);
 	
 	$url = array_shift($argv);
 	$extensions = array_shift($argv);

 	$extras = array();
 	foreach ($argv as $a)
 	{
 		$parts = explode('=',$a);
 		if (!in_array($parts[0],array($split_dirs_key,$drop_deco_key,$reverse_key,$beep_key)))
 		{
 			file_put_contents('php://stderr', 'Unknown extra argument "'.$parts[0]."\"\n");
 			exit(1);
 		}
 			
 		$extras[$parts[0]] = count($parts)==1 ? NULL : $parts[1];
 	}	
 	
 	$xpath = get_page_xpath($url);
 	if ($xpath!==NULL)
 		print_wget($xpath,$extensions,$extras);
 }

 ?>
	<?php

	/* ===================== COMMON CODE ============================

	This file is be used by two other scripts.

	2013-07-15 * bash printing functions are wrapped into class

	2013-06-29 * technical change to make c_get work

	2013-05-15, 2 * polished code for other scripts needs

	2013-05-15 * suppressed warning on lost internet connection

	2013-05-06 * generating "wget" command with all variables passed
	from shell

	2013-05-03 * more shared code put in here

	2013-04-24 * added "chapter" phrase for removal

	2013-04-22 * added function for dropping a week/lecture phrase
	* removing multiple whitespaces in a row

	2013-04-15 * added Coursera CSS getters and more shared code

	2013-02-15 * special function for whitespace removal

	2013-02-11 * converting character 0xc2 (?) and hard space to space

	2012-10-19 * question marks in filenames are converted to spaces
	(because of MS-Windows)

	2012-09-18 * initial release

	================================================================== */

	// ---- Coursera specific stuff ----------------------------------------

	$split_dirs_key = '--split_dirs';
	$drop_deco_key = '--drop_week';
	$reverse_key = '--reverse';
	$beep_key = '--beep';
	$debug_key = '--debug';
	$limit_key = '--limit';

	$debug_page = NULL;

	function c_query_groups($xpath)
	{
	return $xpath->query('//div[contains(@class,"course-item-list-header")]');
	}

	function drop_deco($name)
	{
	$s = $name;
	$s = preg_replace('/^(Week\|Lecture\|Chapter)[\s]\d+[.:\s\-]/','',$s);

	if ($s=='')
	return $name;
	else
	return $s;
	}

	function c_query_dir($xpath,$group)
	{
	$dir = coursera_trim($xpath->query('./h3',$group)->item(0)->nodeValue);
	return $dir;
	}

	function c_deco_dir($dir,$group_count,$drop_deco)
	{
	if ($drop_deco)
	$dir = drop_deco($dir);

	return str_pad($group_count,2,'0',STR_PAD_LEFT).'. '.fix_filename($dir);
	}

	function c_query_list($xpath,$group)
	{
	return $xpath->query('.//li',$group->nextSibling);
	}

	function c_query_row($xpath,$node,$drop_deco,&$row,&$title)
	{
	$row = $xpath->query('.//a[contains(@class,"lecture-link")]',$node)->item(0);

	$title = fix_filename($row->firstChild->nodeValue);
	if ($drop_deco)
	$title = drop_deco($title);
	}


	function c_get_embedded_links($row,$ext,$session = NULL)
	{
	$frame = trim($row->attributes->getNamedItem('data-modal-iframe')->nodeValue);
	// lectures in preview mode are put at external pages, so we have to download them extra
	$view = get_page_xpath($frame,$session);

	if (!$view)
	return NULL;
	else
	{
	$links = $view->query('//video[@id="QL_video_element_first"]/source[@type="video/'.$ext.'"]');
	if ($links->length===0)
	$links = $view->query('//div[@id="QL_player_container_first"]//source[@type="video/'.$ext.'"]');

	return $links;
	}
	}

	/*function c_get_embedded_links2($row,$ext,$session = NULL)
	{
	$frame = trim($row->attributes->getNamedItem('data-modal-iframe')->nodeValue);
	// lectures in preview mode are put at external pages, so we have to download them extra
	$view = get_page_xpath($frame,$session);

	if (!$view)
	return NULL;
	else
	return $view->query('//div[@id="QL_player_container_first"]//embed[@id="me_flash_0" and ends-with(@flashvars,".'.$ext.'")]');
	}*/

	function coursera_trim($s)
	{
	return trim(strtr($s,"\xa0\xc2",' '));
	}

	// ---- general php code -----------------------------------------------

	class BashPrinter
	{
	private $dirLines = array();

	public function wget_file_print($link,$target_filename,$log = NULL,$session = NULL)
	{
	foreach ($this->dirLines as $s)
	echo $s;
	$this->dirLines = array();

	echo 'if [ ! -e "'.$target_filename.'" ] ; then'."\n";
	echo ' wget $@ -nc --no-cookies ';
	if ($session!==NULL)
	echo ' --header "Cookie: session='.$session.'" ';
	echo '"'.$link.'" -O "'.$target_filename.'"'."\n";
	echo ' if [ $? -ne 0 ]'."\n";
	echo ' then'."\n";
	echo ' rm -f "'.$target_filename.'"; ERRORS=$((ERRORS+1))'."\n";
	if ($log!==NULL)
	{
	echo ' else'."\n";
	echo ' echo "'.$link.'" >> '.$log."\n";
	}
	echo ' fi'."\n";
	echo 'fi'."\n";
	}

	public function mkdir_print($dir,$extras)
	{
	global $split_dirs_key;

	$this->dirLines = array();

	$this->dirLines[] = "\n";
	if (array_key_exists($split_dirs_key,$extras))
	{
	foreach ($extras[$split_dirs_key] as $d)
	$this->dirLines[] = 'mkdir -p "'.$d.'/'.$dir.'"'."\n";
	}
	else
	$this->dirLines[] = 'mkdir "'.$dir.'"'."\n";
	}
	}

	function process_extra_arguments(&$extras)
	{
	global $split_dirs_key;
	if (array_key_exists($split_dirs_key,$extras))
	$extras[$split_dirs_key] = explode(' ',$extras[$split_dirs_key]);
	}

	function get_dom($content)
	{
	$dom = new DOMDocument();
	$errors_mode = libxml_use_internal_errors(TRUE);
	$content = mb_convert_encoding($content, 'HTML-ENTITIES', "UTF-8");
	$dom->loadHTML($content);
	libxml_clear_errors();
	libxml_use_internal_errors($errors_mode);
	$dom->preserveWhiteSpace = false;

	return $dom;
	}

	function fix_filename($s)
	{
	return preg_replace('/\s{2,}/', ' ',coursera_trim(strtr($s,'?:"/\\',' .\'__')));
	}

	function get_page_xpath($url,$session = NULL)
	{
	global $debug_page;

	$http = array('method'=>'GET');
	if ($session!==NULL)
	$http['header'] = 'Cookie: session='.$session.';';

	$context = stream_context_create(array('http'=> $http));

	$content = @file_get_contents($url,false,$context);
	$debug_page = $content;

	if ($content===FALSE)
	return NULL;

	$dom = get_dom($content);
	$xpath = new DOMXPath($dom);

	return $xpath;
	}



	?>
	<?php

	/* ===================== COURSERA GETTER ============================
	tags: [coursera video download] [coursera lecture download]

	CHANGELOG:
	---------

	2013-08-06 * additional info from tooltip is processed to be used
	as a filename

	2013-07-15 * if there is nothing to download for given folder
	it is not created

	2013-06-29 * updated extraction for embedded videos
	* added option to limit extraction from main page

	2013-05-15, 2 * corrected reporting failed download or extraction

	2013-05-15 * reporting failed extraction of main resource

	2013-05-03 * more unification with c_preview utility -- ability to
	download embedded videos as well (read: webm), add
	"~" character before file extension to make Coursera
	Getter fetch embedded video
	* "--extension" option in no longer supported -- add
	dot (".") before file extension instead

	2013-04-22 * dropping a week/lecture phrase from filenames as well

	2013-04-15 * refactoring

	2013-04-07 * keeping log of downloads (file "downloads.log") as
	countermeasure for renaming the lectures/notes
	* added "beep" option to make a sound at the end of
	downloading

	2013-04-03 * bugfix: the title of lecture sometimes was ignored

	2013-02-15 * more accurate whitespace removal

	2013-02-02 * automatically removes corrupted files

	2013-01-23 * new option "reverse" for the courses which put
	sections in "from newest to oldest" order

	2013-01-08 * Coursera changed its web format, along with structure
	and CSS tags/classes this version hopefully is
	changed to reflect all of those

	2012-10-12 * added option --drop_week to drop "week X." part from
	the directory (remind me this was supposed to be dead
	simple tool ;-D)

	2012-10-11 * added option --split_dirs to save files into
	subdirectories according of files extensions

	2012-10-03 * added option --extension to get resources by
	extension of the files, not the tooltips

	2012-09-18 * added c_common.php
	* you can specify via filetypes what to grab and what
	extension set

	2012-07-05 * initial release of Coursera preview getter

	2012-06-14 * added little control if there is insufficient number
	of arguments

	2012-06-11 * UTF-8 in filenames are supported
	(another module for PHP is required -- mbstring)
	* replaces slash and backslash with underscore

	2012-06-07 * changed this info, added another way of getting
	cookies

	2012-06-06, 2 * extensions casing reverted -- they matter again
	* directories are named according to Lectures sections
	* handles multiple files for given file type
	* files are counted within each directory, not within
	entire course

	2012-06-06 * extensions can be given lower/upper-case, they do not
	matter

	2012-06-05, 2 * creates weekly subdirectories and puts the files in
	there

	2012-06-05 * initial release

	WHAT IT DOES:
	------------

	* it parses given course Lectures page
	* it extracts all the desired content (links for videos, slides, etc)
	* it uses consistent naming of the files
	* it replaces colon with period (hello Windows users)
	* it finally creates a bunch of wget command ready to execute
	* it ignores already existing files, so it is safe to rerun wget
	script just to get missing files (note this might be not true if you
	update this script, because of possible change in naming convention)

	WHAT YOU NEED:
	-------------

	1. proper shell (Windows users -- of course I recommend switching to
	Linux entirely, but as a workaround Cygwin should be fine -- I
	don't know how about the tools I mention below)
	2. wget (in openSUSE `sudo zypper in wget`)
	3. php5 (in openSUSE `sudo zypper in php5`)
	4. php5-openssl (in openSUSE `sudo zypper in php5-openssl`)
	5. php5-mbstring (in openSUSE `sudo zypper in php5-mbstring`)
	6. and an adventurous soul -- in Firefox, go to
	Edit/Preferences/Privacy/Remove Individual Cookie (don't freak
	out!) search for "coursera". Several items should appear -- look
	for key session for the site you would like to download (for
	example "nlp"). Copy the value (content) of that key. Close the
	preferences window (do NOT delete anything!) -- I will be
	grateful for info if there is easier way

	Ok, so now you know the address of the site, the session, and the files
	you would like to download.

	Jan de Vos sent another way for getting cookies (step 5):

	* find the cookies directory -- in case of Linux it will be something
	like this `~/.mozilla/firefox/88xw1k8g.default/`
	* run sqlite3 -- `sqlite3 cookies.sqlite`
	* run SQL query -- `select path,value from moz_cookies where
	baseDomain = 'coursera.org' and name='session';`

	You will get the session codes for all courses you are enrolled on.

	USAGE:
	-----

	php c_get.php "link_to_lectures_page" "file types" "session code" > wget_script_name.sh
	sh wget_script_name.sh

	Example (this is one line):

	php c_get.php "https://class.coursera.org/crypto/lecture/index" "MP4 PDF" "HERE&IS%MY&SESSION^VALUE@WHICHOF!COURSEI_WONT*TELL9YOU" > wgetter.sh

	the one above creates appropriate script for wget for downloading videos
	(MP4) and slides (PDF).

	Please note the file type casing (MP4 vs. mp4) must match the casing of
	the title (tooltip) of given category of files -- check the Lectures
	page to find it out.

	It is possible to pass file type in format "FileFormat=FileExtension",
	so this script will look for one thing, but save as another. For example some courses list pdf files as "Slides". In such case pass such file format "Slides=pdf" -- this mean "Slides" will be grabbed, but saved with extension "pdf".

	Some courses do not use consistent naming of tooltips (unfortunately),
	in such case you can download files directly by extension -- add dot
	(".") character in front of tile type. As previously, pay attention to
	lowercase/uppercase (e.g. usually the extension is "mp4" but tooltip is
	"MP4"). Example:

	php c_get.php "https://class.coursera.org/scala/lecture/index" ".mp4 .pdf" "HERE&IS%MY&SESSION" > wgetter.sh

	Yet another source of files are embedded frames (the ones when you click
	to view lecture online). One of the advantages of this is ability to
	download video in webm format. Instead of "." use now "~", for example:

	php c_get.php "https://class.coursera.org/scala/lecture/index" "~webm .pdf" "HERE&IS%MY&SESSION" > wgetter.sh

	NOTE: the video will be downloaded from embedded player, but handouts
	(pdf) will be downloaded from download (resources) section.

	If you would like to have notes in the "notes" subdirectory and lectures
	in "lectures" one add "--split_dirs" argument in such way:

	php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --split_dirs="videos texts" > wgetter.sh

	so "mp4" files will go into "videos" subdirectory and "pdf" files into
	"texts" subdirectory.

	If the directories with openining "Week X." seem redundant add
	"--drop_week" option:

	php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --drop_week > wgetter.sh

	Instead of having "02. Week 1: Functions & Evaluations" you will get
	"02. Functions & Evaluations".

	For courses which do not use natural order (from oldest to newest) there
	is an option "reverse":

	php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --reverse > wgetter.sh

	This will tell this script to use reversed order of numbering sections.

	The courses with embedded videos are harder to process -- extraction
	takes more time. If you know in advance that you don't want to extract
	some portion of the lectures you can pass the limit option:

	php c_get.php "https://class.coursera.org/scala/lecture/index" "mp4 pdf" "HERE&IS%MY&SESSION" --limit="Week 9" > wgetter.sh

	This will start extraction from section containing phrase "Week 9". In
	case of reversed order -- it will stop extraction on phrase "Week 9".

	In all above examples, video lecture (mp4/webm) came first -- the
	program assumes it is the main resource, and if it is missing it will
	report this fact. It won't report missing resource of any other kind.

	Once the actual getter script is created (here: wgetter.sh) you can pass
	any extra option for "wget". For example you can run it as:

	sh wgetter.sh --limit-rate=100k

	This would limit speed of download to 100KB/s. See "man wget" for more
	options.
	*
	SECURITY NOTE:
	-------------

	Do NOT share your session code with anyone, and this means -- do NOT
	share the wget script with anyone as well!

	================================================================== */

	require_once 'c_common.php';

	function print_wget($xpath,$session,$extensions,$extras)
	{
	global $split_dirs_key,$drop_deco_key,$reverse_key,$beep_key,$debug_key,$limit_key;
	global $debug_page;

	$bash_printer = new BashPrinter();

	process_extra_arguments($extras);

	// done with extra arguments ---------------------------------------

	$downloads_filename = 'downloads.log';

	$downloads = array();
	if (file_exists($downloads_filename))
	$downloads = file($downloads_filename, FILE_IGNORE_NEW_LINES \| FILE_SKIP_EMPTY_LINES);

	echo "ERRORS=0\n";

	$group_list = c_query_groups($xpath);

	$group_count = array_key_exists($reverse_key,$extras) ? $group_list->length : 1;

	if (array_key_exists($reverse_key,$extras))
	// in reverse order pretend limit was NOT hit
	$limit_hit = false;
	else
	// if there is no limit given by user, pretend it was hit
	$limit_hit = !array_key_exists($limit_key,$extras);

	foreach ($group_list as $group)
	{
	$item_count = 0;

	$dir = c_query_dir($xpath,$group);

	if (!$limit_hit && array_key_exists($limit_key,$extras))
	$limit_hit = (strpos($dir,$extras[$limit_key])!==FALSE);

	$dir = c_deco_dir($dir,$group_count,array_key_exists($drop_deco_key,$extras));
	$group_count += array_key_exists($reverse_key,$extras) ? -1 : +1;

	if (array_key_exists($limit_key,$extras))
	{
	if (array_key_exists($reverse_key,$extras))
	{
	if ($limit_hit)
	break;
	}
	elseif (!$limit_hit)
	continue;
	}

	$bash_printer->mkdir_print($dir,$extras);

	$node_list = c_query_list($xpath,$group);

	foreach ($node_list as $node)
	{
	++$item_count;

	c_query_row($xpath,$node,array_key_exists($drop_deco_key,$extras),$row,$title);

	// each $ext_combo might be in such forms: either "FileType" or "FileType=FileExtension" (e.g. "PDF", "Slides=pdf")
	for ($i_ext = 0; $i_ext < count($extensions); ++$i_ext)
	{
	$ext_parts = explode('=',$extensions[$i_ext]);

	if (array_key_exists($split_dirs_key,$extras))
	$target_dir = $extras[$split_dirs_key][$i_ext].'/'.$dir;
	else
	$target_dir = $dir;

	$attr_extractor = 'href';

	if ($ext_parts[0][0]=='~') // extract link by extension from viewer frame
	{
	$ext_parts[0] = substr($ext_parts[0],1);

	$links = c_get_embedded_links($row,$ext_parts[0],$session);

	if ($links===NULL)
	{
	file_put_contents('php://stderr', "Loading embedded frame failed: '$dir/$title'\n");
	continue;
	}
	else if ($links->length===0 && $i_ext===0)
	{
	file_put_contents('php://stderr', "No resources '$ext_parts[0]' found for '$dir/$title'\n");
	if (array_key_exists($debug_key,$extras))
	file_put_contents('DEBUG_'.$title,$debug_page);
	continue;

	/* $links = c_get_embedded_links2($row,$ext_parts[0],$session);

	if ($links===NULL)
	{
	file_put_contents('php://stderr', "Loading fallback embedded frame failed: '$dir/$title'\n");
	continue;
	}
	else if ($links->length===0 && $i_ext===0)
	{
	file_put_contents('php://stderr', "No fallback resources '$ext_parts[0]' found for '$dir/$title'\n");
	if (array_key_exists($debug_key,$extras))
	file_put_contents('DEBUG_'.$title,$debug_page);
	continue;
	}
	else
	$attr_extractor = 'flashvars';*/
	}
	else
	$attr_extractor = 'src';

	if (array_key_exists($debug_key,$extras))
	file_put_contents('php://stderr', "For $dir/$title ".$links->length." '$ext_parts[0]' links found.\n");

	}
	else if ($ext_parts[0][0]=='.') // extract link by extension of the linked file
	{
	$links = $xpath->query('.//div[@class="course-lecture-item-resource"]/a[contains(@href,"'.$ext_parts[0].'")]',$node);
	$ext_parts[0] = substr($ext_parts[0],1);
	}
	else // extract link by tooltip of the link
	$links = $xpath->query('.//a[contains(@title,"'.$ext_parts[0].'")]',$node);

	$match = FALSE;

	foreach ($links as $link)
	{
	$suffix = '';
	if ($links->length>1)
	$suffix = '.'.fix_filename($link->attributes->getNamedItem('title')->nodeValue);

	$link = $link->attributes->getNamedItem($attr_extractor)->nodeValue;
	if ($attr_extractor=='flashvars')
	{
	$url_idx = strpos($link,'&file=http');
	$link = urldecode(substr($link,$url_idx+strlen('&file=')));
	}

	if (!in_array($link,$downloads))
	{
	$target_filename = $target_dir.'/'.str_pad($item_count,3,'0',STR_PAD_LEFT).'. '.$title.$suffix.'.'.strtolower(end(array_values($ext_parts)));
	$bash_printer->wget_file_print($link,$target_filename,$downloads_filename,$session);
	$match = TRUE;
	}
	else if (array_key_exists($debug_key,$extras))
	file_put_contents('php://stderr', "$dir/$title '$ext_parts[0]' already downloaded.\n");
	}
	}

	}
	}

	echo "\n";
	echo 'if [ $ERRORS -ne 0 ] ; then echo "There were some errors while downloading. Run the script again." ; fi'."\n";

	if (array_key_exists($beep_key,$extras))
	echo "beep\n";
	}

	if ($argc<4)
	{
	file_put_contents('php://stderr', "Error: you should input minimum three arguments, the usage is:\n");
	file_put_contents('php://stderr', "\"LECTURES_URL\" \"FILE_TYPES\" \"SESSION_CODE\" [--$beep_key] [--$reverse_key] [--$drop_deco_key] [--$split_dirs_key=\"directories per file type\"]\n");
	}
	else
	{
	array_shift($argv);

	$url = array_shift($argv);
	$extensions = explode(' ',array_shift($argv));
	$session = array_shift($argv);

	$extras = array();
	foreach ($argv as $a)
	{
	$parts = explode('=',$a);
	if (!in_array($parts[0],array($split_dirs_key,$drop_deco_key,$reverse_key,$beep_key,$debug_key,$limit_key)))
	{
	file_put_contents('php://stderr', 'Unknown extra argument "'.$parts[0]."\"\n");
	exit(1);
	}

	$extras[$parts[0]] = count($parts)==1 ? NULL : $parts[1];
	}

	$xpath = get_page_xpath($url,$session);
	if ($xpath!==NULL)
	print_wget($xpath,$session,$extensions,$extras);
	}

	?>
	<?php

	/* ===================== COURSERA PREVIEW GETTER =======================
	tags: [coursera video download] [coursera lecture download]

	CHANGELOG:
	---------

	2013-07-15 * if there is nothing to download for given folder
	it is not created

	2013-06-29 * technical change to work with c_common

	2013-05-15 * reporting failed download or extraction of main
	resource

	2013-05-03 * just keeping in sync with c_get

	2013-04-22 * dropping a week/lecture phrase from filenames as well

	2013-04-15 * update to follow last Coursera changes

	2012-10-12 * sharing code with c_common.php;
	some fixes to follow Coursera changes of preview sites

	2012-07-05 * initial release

	WHAT IT DOES:
	------------

	* it is counterpart for Coursera getter, but this one works only for course previews
	-- the ones with embedded video player, and nothing else

	WHAT YOU NEED:
	-------------

	1. proper shell (Windows users -- of course I recommend switching to Linux entirely, but as a workaround Cygwin should be fine -- I don't know how about the tools I mention below)
	2. wget (in openSUSE `sudo zypper in wget`)
	3. php5 (in openSUSE `sudo zypper in php5`)
	4. php5-mbstring (in openSUSE `sudo zypper in php5-mbstring`)

	USAGE:
	-----

	php c_preview.php "link_to_preview_page" "video_file_type" > wget_script_name.sh
	sh wget_script_name.sh

	Example (this is one line):

	php c_preview.php "https://class.coursera.org/crypto-preview/lecture/index" "mp4"

	the one above creates appropriate script for wget for downloading videos (MP4). Now execute

	sh wgetter.sh

	Please note the file type is not guaranteed to exists on the server
	(so far "webm" and "mp4" are supported by Coursera).

	================================================================== */

	require_once 'c_common.php';

	// https://class.coursera.org/machlearning-001/lecture/preview/index
	function print_wget($xpath,$ext,$extras)
	{
	global $split_dirs_key,$drop_deco_key,$extension_key,$reverse_key,$beep_key;

	$bash_printer = new BashPrinter();

	process_extra_arguments($extras);

	$group_count = 0;

	echo "ERRORS=0\n";

	$group_list = c_query_groups($xpath);
	$group_count = array_key_exists($reverse_key,$extras) ? $group_list->length : 1;

	foreach ($group_list as $group)
	{
	$item_count = 0;

	$dir = c_deco_dir(c_query_dir($xpath,$group),$group_count,array_key_exists($drop_deco_key,$extras));
	$group_count += array_key_exists($reverse_key,$extras) ? -1 : +1;

	$bash_printer->mkdir_print($dir,$extras);

	// get the list of all lectures within current group (week)
	$node_list = c_query_list($xpath,$group);

	foreach ($node_list as $node)
	{
	++$item_count;

	c_query_row($xpath,$node,array_key_exists($drop_deco_key,$extras),$row,$title);

	$video_list = c_get_embedded_links($row,$ext);

	if ($video_list===NULL)
	{
	file_put_contents('php://stderr', "Loading embedded frame failed: '$dir/$title'\n");
	continue;
	}
	else if ($video_list->length==0)
	{
	file_put_contents('php://stderr', "Filetype '$ext' not found for '".$title."'\n");
	continue;
	}

	$video = $video_list->item(0);
	$vid_src = $video->attributes->getNamedItem('src')->nodeValue;

	$bash_printer->wget_file_print($vid_src,$dir.'/'.str_pad($item_count,3,'0',STR_PAD_LEFT).'. '.$title.'.'.strtolower($ext));

	}
	}

	echo 'if [ $ERRORS -ne 0 ] ; then echo "There were some errors while downloading. Run the script again." ; fi'."\n";

	if (array_key_exists($beep_key,$extras))
	echo "beep\n";
	}

	if ($argc<3)
	{
	file_put_contents('php://stderr', "Error: you should input minimum two arguments, the usage is:\n");
	file_put_contents('php://stderr', "\"LECTURES_URL\" \"FILE_TYPES\" [--$beep_key] [--$reverse_key] [--$drop_deco_key] [--$split_dirs_key=\"directories per file type\"]\n");
	}
	else
	{
	array_shift($argv);

	$url = array_shift($argv);
	$extensions = array_shift($argv);

	$extras = array();
	foreach ($argv as $a)
	{
	$parts = explode('=',$a);
	if (!in_array($parts[0],array($split_dirs_key,$drop_deco_key,$reverse_key,$beep_key)))
	{
	file_put_contents('php://stderr', 'Unknown extra argument "'.$parts[0]."\"\n");
	exit(1);
	}

	$extras[$parts[0]] = count($parts)==1 ? NULL : $parts[1];
	}

	$xpath = get_page_xpath($url);
	if ($xpath!==NULL)
	print_wget($xpath,$extensions,$extras);
	}

	?>