Last active
November 22, 2023 16:03
-
-
Save AmirAref/7b1396b5ce4fcb321556a60f01025bfe to your computer and use it in GitHub Desktop.
get the info of videos in youtube by links
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from yt_dlp import YoutubeDL | |
from pandas import DataFrame | |
def extract_info(urls : list): | |
# check parameter type | |
if type(urls) == str: | |
urls = [urls] | |
# extract info | |
data = [] | |
with YoutubeDL({'ignoreerrors':True}) as ytdl: | |
for url in urls: | |
info_dict = ytdl.extract_info(url, download=False) | |
# check is empty | |
if not info_dict: | |
continue | |
info_dict = {key : info_dict[key] for key in ['title', 'description', 'duration_string', 'original_url']} | |
data.append(info_dict) | |
# out | |
return data | |
def main(): | |
# load urls from a file | |
with open('urls.txt', 'r') as file: | |
urls = file.read().split('\n') | |
#ignore whitespaces | |
urls = list(filter(bool, urls)) | |
# extract data form urls | |
data = extract_info(urls) | |
df = DataFrame(data) | |
# output | |
df.to_json('data.json') | |
df.replace('\n', '<br>', regex=True).to_html('data.html', escape=False, render_links=True,) | |
if __name__ == "__main__": | |
main() |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment