eashish93 · February 23, 2017 17:21
diff --git a/googledrive.py b/googledrive.py
 from .common import InfoExtractor
 from ..utils import RegexNotFoundError

 class GoogleDriveIE(InfoExtractor):
 	_VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.*?docid=|(?:docs|drive)\.google\.com/(?:uc\?.*?id=|file/d/))(?P<id>.+?)(?:&|/|$)'
 	_formats = {
 		'5': {'ext': 'flv'},
 		'6': {'ext': 'flv'},
 		'13': {'ext': '3gp'},
 		'17': {'ext': '3gp'},
 		'18': {'ext': 'mp4'},
 		'22': {'ext': 'mp4'},
 		'34': {'ext': 'flv'},
 		'35': {'ext': 'flv'},
 		'36': {'ext': '3gp'},
 		'37': {'ext': 'mp4'},
 		'38': {'ext': 'mp4'},
 		'43': {'ext': 'webm'},
 		'44': {'ext': 'webm'},
 		'45': {'ext': 'webm'},
 		'46': {'ext': 'webm'},
 		'59': {'ext': 'mp4'}
 	}

 	def _real_extract(self, url):
 		video_id = self._match_id(url)
 		webpage = self._download_webpage(
 			'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
 		)
 		try:
 			title = self._html_search_regex(
 				r'"title","(?P<title>.*?)"',
 				webpage,
 				'title',
 				group='title'
 			)
 			fmt_stream_map = self._html_search_regex(
 				r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
 				webpage,
 				'fmt_stream_map',
 				group='fmt_stream_map'
 			)
 			fmt_list = self._html_search_regex(
 				r'"fmt_list","(?P<fmt_list>.*?)"',
 				webpage,
 				'fmt_list',
 				group='fmt_list'
 			)
 #			timestamp = self._html_search_regex(
 #				r'"timestamp","(?P<timestamp>.*?)"',
 #				webpage,
 #				'timestamp',
 #				group='timestamp'
 #			)
 			length_seconds = self._html_search_regex(
 				r'"length_seconds","(?P<length_seconds>.*?)"',
 				webpage,
 				'length_seconds',
 				group='length_seconds'
 			)
 		except RegexNotFoundError:
 			reason = self._html_search_regex(
 				r'"reason","(?P<reason>.*?)"',
 				webpage,
 				'reason',
 				group='reason'
 			)
 			self.report_warning(reason)
 			return

 		fmt_stream_map = fmt_stream_map.split(',')
 		fmt_list = fmt_list.split(',')
 		formats = []
 		for i in range(len(fmt_stream_map)):
 			fmt_id, fmt_url = fmt_stream_map[i].split('|')
 			resolution = fmt_list[i].split('/')[1]
 			width, height = resolution.split('x')
 			formats.append({
 				'url': fmt_url,
 				'format_id': fmt_id,
 				'resolution': resolution,
 				'width': int(width),
 				'height': int(height),
 				'ext': self._formats[fmt_id]['ext']
 			})
 		self._sort_formats(formats)

 		return {
 			'id': video_id,
 			'title': title,
 #			'timestamp': int(timestamp),
 			'duration': int(length_seconds),
 			'formats': sorted(formats, key=lambda format_data: (format_data['width'], format_data['height'], format_data['format_id'])),
 			'formats': formats
 		}
	from .common import InfoExtractor
	from ..utils import RegexNotFoundError

	class GoogleDriveIE(InfoExtractor):
	_VALID_URL = r'(?:https?://)?(?:video\.google\.com/get_player\?.?docid=\|(?:docs\|drive)\.google\.com/(?:uc\?.?id=\|file/d/))(?P<id>.+?)(?:&\|/\|$)'
	_formats = {
	'5': {'ext': 'flv'},
	'6': {'ext': 'flv'},
	'13': {'ext': '3gp'},
	'17': {'ext': '3gp'},
	'18': {'ext': 'mp4'},
	'22': {'ext': 'mp4'},
	'34': {'ext': 'flv'},
	'35': {'ext': 'flv'},
	'36': {'ext': '3gp'},
	'37': {'ext': 'mp4'},
	'38': {'ext': 'mp4'},
	'43': {'ext': 'webm'},
	'44': {'ext': 'webm'},
	'45': {'ext': 'webm'},
	'46': {'ext': 'webm'},
	'59': {'ext': 'mp4'}
	}

	def _real_extract(self, url):
	video_id = self._match_id(url)
	webpage = self._download_webpage(
	'http://docs.google.com/file/d/'+video_id, video_id, encoding='unicode_escape'
	)
	try:
	title = self._html_search_regex(
	r'"title","(?P<title>.*?)"',
	webpage,
	'title',
	group='title'
	)
	fmt_stream_map = self._html_search_regex(
	r'"fmt_stream_map","(?P<fmt_stream_map>.*?)"',
	webpage,
	'fmt_stream_map',
	group='fmt_stream_map'
	)
	fmt_list = self._html_search_regex(
	r'"fmt_list","(?P<fmt_list>.*?)"',
	webpage,
	'fmt_list',
	group='fmt_list'
	)
	# timestamp = self._html_search_regex(
	# r'"timestamp","(?P<timestamp>.*?)"',
	# webpage,
	# 'timestamp',
	# group='timestamp'
	# )
	length_seconds = self._html_search_regex(
	r'"length_seconds","(?P<length_seconds>.*?)"',
	webpage,
	'length_seconds',
	group='length_seconds'
	)
	except RegexNotFoundError:
	reason = self._html_search_regex(
	r'"reason","(?P<reason>.*?)"',
	webpage,
	'reason',
	group='reason'
	)
	self.report_warning(reason)
	return

	fmt_stream_map = fmt_stream_map.split(',')
	fmt_list = fmt_list.split(',')
	formats = []
	for i in range(len(fmt_stream_map)):
	fmt_id, fmt_url = fmt_stream_map[i].split('\|')
	resolution = fmt_list[i].split('/')[1]
	width, height = resolution.split('x')
	formats.append({
	'url': fmt_url,
	'format_id': fmt_id,
	'resolution': resolution,
	'width': int(width),
	'height': int(height),
	'ext': self._formats[fmt_id]['ext']
	})
	self._sort_formats(formats)

	return {
	'id': video_id,
	'title': title,
	# 'timestamp': int(timestamp),
	'duration': int(length_seconds),
	'formats': sorted(formats, key=lambda format_data: (format_data['width'], format_data['height'], format_data['format_id'])),
	'formats': formats
	}