Last active
August 24, 2021 00:09
-
-
Save randompast/7db4f3824615a24809b1e8522b75e55e to your computer and use it in GitHub Desktop.
Paper Title to TMP Video Link
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
API_KEY = 'your_key_here' | |
# https://developers.google.com/youtube/v3/docs/playlistItems/list?hl=en&apix_params=%7B%22part%22%3A%5B%22snippet.publishedAt%22%2C%22snippet.title%22%2C%22snippet.description%22%5D%2C%22playlistId%22%3A%22UUbfYPyITQ-7l4upoX8nvctg%22%7D | |
# -*- coding: utf-8 -*- | |
# Sample Python code for youtube.playlistItems.list | |
# See instructions for running these code samples locally: | |
# https://developers.google.com/explorer-help/guides/code_samples#python | |
import os | |
import pickle | |
import re | |
import googleapiclient.discovery | |
def main(): | |
# Disable OAuthlib's HTTPS verification when running locally. | |
# *DO NOT* leave this option enabled in production. | |
os.environ["OAUTHLIB_INSECURE_TRANSPORT"] = "1" | |
api_service_name = "youtube" | |
api_version = "v3" | |
DEVELOPER_KEY = API_KEY | |
youtube = googleapiclient.discovery.build( | |
api_service_name, api_version, developerKey = DEVELOPER_KEY) | |
pageToken = '' | |
data = [] | |
while True: | |
request = youtube.playlistItems().list( | |
part="snippet", | |
playlistId="UUbfYPyITQ-7l4upoX8nvctg", | |
maxResults="10000", | |
pageToken=pageToken | |
) | |
print('obtaining', pageToken) | |
response = request.execute() | |
data += [response] | |
if 'nextPageToken' not in response: | |
break | |
else: | |
pageToken = response['nextPageToken'] | |
pickle.dump(data, open('output_all.p', 'wb')) | |
def get_paper_info(d): | |
d = re.sub('[\r\n]', ' ', d) #remove instances where paper title spans 2+ lines | |
matches=re.findall(r'paper \"(.+?)\"',d) | |
papers = ",".join(matches) | |
if len(papers) == 0: | |
matches=re.findall(r'paper (.+?)\"',d) | |
papers = ",".join(matches) | |
# print([d, papers]) | |
return papers if len(papers) > 0 else "missing" | |
def print_page(p, acc): | |
# default_url = 'https://www.youtube.com/watch?v=' | |
default_url = 'http://y2u.be/' | |
for i in p['items']: | |
vid = i['snippet'] | |
vid_published = vid['publishedAt'] | |
vid_title = vid['title'] | |
d = vid['description'] | |
paper_titles = get_paper_info(d) | |
vid_url = default_url + vid['resourceId']['videoId'] | |
acc['videos'] += [[vid_published, vid_title, vid_url, paper_titles, d]] | |
def print_output(): | |
data = pickle.load(open('output_all.p', 'rb')) | |
acc = {'videos' : []} | |
for i in data: | |
print_page(i,acc) | |
print( len( [ i for i in acc['videos'] if i[3] == 'missing' ] ) ) | |
for i, v in enumerate( acc['videos'] ): | |
# if v[3] == 'missing' : | |
print( v[0][:10], '[{0}]({1})'.format(v[1], v[2]) ) | |
print(' - {0}'.format(v[3]) ) | |
# print( v[-1] ) | |
print() | |
if __name__ == "__main__": | |
# main() | |
print_output() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment