Last active
August 11, 2025 09:23
-
-
Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.
IDMB trailer lookup
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import json | |
| import requests | |
| import time | |
| def get_imdb_videos(imdb_id): | |
| import re, requests | |
| API_URL = "https://graphql.prod.api.imdb.a2z.com/" | |
| HEADERS = { | |
| 'Referer': 'https://www.imdb.com/', | |
| 'Origin': 'https://www.imdb.com', | |
| 'User-Agent': 'Mozilla/5.0' | |
| } | |
| def gqlmin(q): | |
| return re.sub(' {4}', '', q) | |
| query_subpage = ''' | |
| query TitleVideoGallerySubPage( | |
| $const: ID!, | |
| $first: Int!, | |
| $filter: VideosQueryFilter, | |
| $sort: VideoSort | |
| ) { | |
| title(id: $const) { | |
| titleText { text } | |
| plot { plotText { plainText } } | |
| videoStrip(first: $first, filter: $filter, sort: $sort) { | |
| ...VideoGalleryItems | |
| } | |
| } | |
| } | |
| ''' | |
| query_pagination = ''' | |
| query TitleVideoGalleryPagination( | |
| $const: ID!, | |
| $first: Int!, | |
| $after: ID!, | |
| $filter: VideosQueryFilter, | |
| $sort: VideoSort | |
| ) { | |
| title(id: $const) { | |
| videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) { | |
| ...VideoGalleryItems | |
| } | |
| } | |
| } | |
| ''' | |
| fragment = ''' | |
| fragment VideoGalleryItems on VideoConnection { | |
| pageInfo { | |
| endCursor | |
| hasNextPage | |
| } | |
| total | |
| edges { | |
| node { | |
| id | |
| contentType { id } | |
| name { value } | |
| runtime { value } | |
| thumbnail { url } | |
| primaryTitle { | |
| series { | |
| displayableEpisodeNumber { | |
| displayableSeason { | |
| season | |
| } | |
| } | |
| series { | |
| titleText { text } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| } | |
| ''' | |
| variables = { | |
| "const": imdb_id, | |
| "first": 50, | |
| "filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]}, | |
| "sort": {"by": "DATE", "order": "DESC"} | |
| } | |
| videos = [] | |
| plot_text = "" | |
| item_title = "" | |
| total_videos = None | |
| # First page | |
| pdata = { | |
| 'operationName': "TitleVideoGallerySubPage", | |
| 'query': gqlmin(query_subpage + fragment), | |
| 'variables': variables | |
| } | |
| r = requests.post(API_URL, headers=HEADERS, json=pdata) | |
| r.raise_for_status() | |
| json_data = r.json() | |
| title_data = json_data.get('data', {}).get('title', {}) | |
| plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "") | |
| item_title = title_data.get('titleText', {}).get('text', "") | |
| video_data = title_data.get('videoStrip', {}) | |
| total_videos = video_data.get('total') | |
| videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])]) | |
| cursor = video_data.get('pageInfo', {}).get('endCursor') | |
| has_next = video_data.get('pageInfo', {}).get('hasNextPage', False) | |
| # Pagination loop | |
| while has_next and cursor: | |
| variables["after"] = cursor | |
| pdata = { | |
| 'operationName': "TitleVideoGalleryPagination", | |
| 'query': gqlmin(query_pagination + fragment), | |
| 'variables': variables | |
| } | |
| r = requests.post(API_URL, headers=HEADERS, json=pdata) | |
| r.raise_for_status() | |
| video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {}) | |
| videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])]) | |
| cursor = video_data.get('pageInfo', {}).get('endCursor') | |
| has_next = video_data.get('pageInfo', {}).get('hasNextPage', False) | |
| time.sleep(0.3) | |
| # Match old output: inject plot, total, and item_title | |
| for idx, v in enumerate(videos): | |
| v["plot"] = plot_text | |
| v["total"] = total_videos | |
| v["item_title"] = item_title | |
| videos[idx] = v | |
| return videos | |
| def time_format(seconds: int) -> str: | |
| if seconds is not None: | |
| seconds = int(seconds) | |
| d = seconds // (3600 * 24) | |
| h = seconds // 3600 % 24 | |
| m = seconds % 3600 // 60 | |
| s = seconds % 3600 % 60 | |
| if d > 0: | |
| return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s) | |
| elif h > 0: | |
| return '{:02d}H {:02d}m {:02d}s'.format(h, m, s) | |
| elif m > 0: | |
| return '{:02d}m {:02d}s'.format(m, s) | |
| elif s > 0: | |
| return '{:02d}s'.format(s) | |
| return '-' | |
| import re | |
| def extract_season_number(title): | |
| # Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits | |
| pattern = r"(:?.*(?:Season|Series))(?:\s*\d*)" | |
| match = re.search(pattern, title, re.IGNORECASE) | |
| try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip()) | |
| except: extract_season_number = None | |
| return extract_season_number | |
| def find_best_trailer(trailer_list, season_number=None): | |
| if len(trailer_list) == 0: | |
| return None | |
| best_match = None | |
| best_score = -1 | |
| fallback_thumbnail = None | |
| trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True) | |
| match_list = [] | |
| new_trailer_list = [] | |
| season_list = [] | |
| official_flag = False | |
| theatrical_list = ['theatrical','full','final'] | |
| theatrical_flag = False | |
| titleText = None | |
| for trailer in trailer_list: | |
| if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer': | |
| curr_dict = {} | |
| if trailer['primaryTitle'].get('series',{}) != {}: | |
| try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season']) | |
| except: season = None | |
| #print(trailer) | |
| curr_dict['id'] = trailer['id'] | |
| curr_dict['vid_url'] = 'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id'])) | |
| curr_dict['season'] = season | |
| curr_dict['title'] = trailer['name']['value'] | |
| if season: | |
| titleText = trailer['primaryTitle']['series']['series']['titleText']['text'] | |
| if not season: | |
| season = extract_season_number(curr_dict['title']) | |
| if season: | |
| curr_dict['season'] = season | |
| if any(word in str(curr_dict['title']).lower() for word in theatrical_list): | |
| curr_dict['theatrical'] = True | |
| theatrical_flag = True | |
| else: | |
| curr_dict['theatrical'] = False | |
| if 'official' in str(curr_dict['title']).lower(): | |
| curr_dict['official'] = True | |
| official_flag = True | |
| if season: | |
| official_flag = False | |
| curr_dict['official'] = False | |
| else: | |
| curr_dict['official'] = False | |
| if season and not season in season_list: | |
| season_list.append(season) | |
| curr_dict['thumbnail'] = trailer['thumbnail']['url'] | |
| curr_dict['runtime'] = trailer['runtime']['value'] | |
| curr_dict['time'] = time_format(trailer['runtime']['value']) | |
| #print(curr_dict['title']) | |
| new_trailer_list.append(curr_dict) | |
| if season_number and season_number in season_list: | |
| season_match = True | |
| elif season_list != []: | |
| if season_number: | |
| for i in reversed(sorted(season_list)): | |
| if i <= season_number: | |
| break | |
| season_match = i | |
| else: | |
| season_match = False | |
| else: | |
| season_match = False | |
| if type(season_match) == type(season_number): | |
| if season_match > season_number: | |
| season_match = False | |
| offical_trailer = None | |
| season_trailer = None | |
| if season_match == True and type(season_match) == type(True): | |
| for trailer in new_trailer_list: | |
| if trailer['season'] == season_number: | |
| season_trailer = trailer | |
| break | |
| elif season_match == False: | |
| season_trailer = new_trailer_list[0] | |
| else: | |
| for trailer in new_trailer_list: | |
| if trailer['season'] == season_match: | |
| season_trailer = trailer | |
| break | |
| if theatrical_flag == True: | |
| for trailer in new_trailer_list: | |
| if trailer['theatrical']: | |
| offical_trailer = trailer | |
| break | |
| elif official_flag == True: | |
| for trailer in new_trailer_list: | |
| if trailer['official'] and not 'teaser' in str(trailer['title']).lower(): | |
| offical_trailer = trailer | |
| break | |
| if not offical_trailer: | |
| for trailer in new_trailer_list: | |
| if trailer['official']: | |
| offical_trailer = trailer | |
| break | |
| elif titleText: | |
| for trailer in new_trailer_list: | |
| if trailer['title'] == titleText: | |
| offical_trailer = trailer | |
| break | |
| if offical_trailer and official_flag: | |
| if season_match == False or season_trailer == None: | |
| season_trailer = offical_trailer | |
| elif official_flag == False and offical_trailer: | |
| if season_match == False: | |
| season_trailer = offical_trailer | |
| #print(new_trailer_list) | |
| #print(titleText) | |
| return season_trailer | |
| def extract_imdb_mp4_url(video_id): | |
| url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26" | |
| headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'} | |
| response = requests.get(url, headers=headers) | |
| if response.status_code != 200: | |
| raise Exception(f"Failed to fetch page: {response.status_code}") | |
| html = response.text | |
| PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]') | |
| url = None | |
| for i in eval(PlaybackURL): | |
| if i['videoMimeType'] == 'MP4': | |
| return i['url'], i | |
| else: | |
| if not url: | |
| url = i['url'] | |
| video = i | |
| #print(i['videoDefinition']) | |
| #print(i['videoMimeType']) | |
| return url, video | |
| all_videos = get_imdb_videos(imdb_id='tt4532368') | |
| #print(all_videos) | |
| best_trailer = find_best_trailer(all_videos, season_number=None) | |
| if best_trailer: | |
| print(best_trailer) | |
| print(best_trailer['title']) | |
| video_url, video = extract_imdb_mp4_url(best_trailer['id']) | |
| print("MP4 URL:", video_url) | |
| print(video) | |
| exit() | |
| #print(json.dumps(all_videos[:3], indent=2)) # Show first 3 videos | |
| print(f"Total videos fetched: {len(all_videos)}") | |
| print(all_videos) | |
| for i in all_videos: | |
| if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower(): | |
| print(i['name']['value'],' - ' ,time_format(i['runtime']['value'])) |
Author
Author
found a small issue when a trailer contains Final but is for final season eg. Beter Call Sault "Better Call Saul: A Look At The Final Season" It gets picked up as theatrical trailer To stop this, i just did a 2nd check that it doesnt contain season
checkout Gujal00/Kodi-Official@c556e4e
I've provided working API lookups for all the pages currently scraped:
VideoPlayback => https://www.imdb.com/video/vi1020905497/?ref_=ttvg_vi_1
CalendarPage => https://www.imdb.com/calendar/?region=US&type=MOVIE&ref_=rlm
movies_near_you => https://www.imdb.com/showtimes/
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
however other than "types", nameConstraints are
"nameConstraints":{allNameIds":["nm0004395","nm3138882"]}"And titleconstraints:
ie anyTitleIds being - tt11280740 eg IMDB ids. So not actually "video clip name" or "video clip title"