Skip to content

Instantly share code, notes, and snippets.

@henryjfry
Last active August 11, 2025 09:23
Show Gist options
  • Select an option

  • Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.

Select an option

Save henryjfry/8da2b90aa4a4ef09110625a56b2367c7 to your computer and use it in GitHub Desktop.
IDMB trailer lookup
import json
import requests
import time
def get_imdb_videos(imdb_id):
import re, requests
API_URL = "https://graphql.prod.api.imdb.a2z.com/"
HEADERS = {
'Referer': 'https://www.imdb.com/',
'Origin': 'https://www.imdb.com',
'User-Agent': 'Mozilla/5.0'
}
def gqlmin(q):
return re.sub(' {4}', '', q)
query_subpage = '''
query TitleVideoGallerySubPage(
$const: ID!,
$first: Int!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
titleText { text }
plot { plotText { plainText } }
videoStrip(first: $first, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
query_pagination = '''
query TitleVideoGalleryPagination(
$const: ID!,
$first: Int!,
$after: ID!,
$filter: VideosQueryFilter,
$sort: VideoSort
) {
title(id: $const) {
videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
...VideoGalleryItems
}
}
}
'''
fragment = '''
fragment VideoGalleryItems on VideoConnection {
pageInfo {
endCursor
hasNextPage
}
total
edges {
node {
id
contentType { id }
name { value }
runtime { value }
thumbnail { url }
primaryTitle {
series {
displayableEpisodeNumber {
displayableSeason {
season
}
}
series {
titleText { text }
}
}
}
}
}
}
'''
variables = {
"const": imdb_id,
"first": 50,
"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
"sort": {"by": "DATE", "order": "DESC"}
}
videos = []
plot_text = ""
item_title = ""
total_videos = None
# First page
pdata = {
'operationName': "TitleVideoGallerySubPage",
'query': gqlmin(query_subpage + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
json_data = r.json()
title_data = json_data.get('data', {}).get('title', {})
plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
item_title = title_data.get('titleText', {}).get('text', "")
video_data = title_data.get('videoStrip', {})
total_videos = video_data.get('total')
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
# Pagination loop
while has_next and cursor:
variables["after"] = cursor
pdata = {
'operationName': "TitleVideoGalleryPagination",
'query': gqlmin(query_pagination + fragment),
'variables': variables
}
r = requests.post(API_URL, headers=HEADERS, json=pdata)
r.raise_for_status()
video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
cursor = video_data.get('pageInfo', {}).get('endCursor')
has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
time.sleep(0.3)
# Match old output: inject plot, total, and item_title
for idx, v in enumerate(videos):
v["plot"] = plot_text
v["total"] = total_videos
v["item_title"] = item_title
videos[idx] = v
return videos
def time_format(seconds: int) -> str:
if seconds is not None:
seconds = int(seconds)
d = seconds // (3600 * 24)
h = seconds // 3600 % 24
m = seconds % 3600 // 60
s = seconds % 3600 % 60
if d > 0:
return '{:02d}D {:02d}H {:02d}m {:02d}s'.format(d, h, m, s)
elif h > 0:
return '{:02d}H {:02d}m {:02d}s'.format(h, m, s)
elif m > 0:
return '{:02d}m {:02d}s'.format(m, s)
elif s > 0:
return '{:02d}s'.format(s)
return '-'
import re
def extract_season_number(title):
# Match "Season" or "Series" followed by optional spaces, optional punctuation, and digits
pattern = r"(:?.*(?:Season|Series))(?:\s*\d*)"
match = re.search(pattern, title, re.IGNORECASE)
try: extract_season_number = int(match.group(0).replace(match.group(1),'').strip())
except: extract_season_number = None
return extract_season_number
def find_best_trailer(trailer_list, season_number=None):
if len(trailer_list) == 0:
return None
best_match = None
best_score = -1
fallback_thumbnail = None
trailer_list = sorted(trailer_list, key=lambda x: x['runtime']['value'], reverse=True)
match_list = []
new_trailer_list = []
season_list = []
official_flag = False
theatrical_list = ['theatrical','full','final']
theatrical_flag = False
titleText = None
for trailer in trailer_list:
if trailer['contentType']['id'] == 'amzn1.imdb.video.contenttype.trailer':
curr_dict = {}
if trailer['primaryTitle'].get('series',{}) != {}:
try: season = int(trailer['primaryTitle']['series']['displayableEpisodeNumber']['displayableSeason']['season'])
except: season = None
#print(trailer)
curr_dict['id'] = trailer['id']
curr_dict['vid_url'] = 'https://www.imdb.com/video/%s/?ref_=ttvg_vi_1' % (str(trailer['id']))
curr_dict['season'] = season
curr_dict['title'] = trailer['name']['value']
if season:
titleText = trailer['primaryTitle']['series']['series']['titleText']['text']
if not season:
season = extract_season_number(curr_dict['title'])
if season:
curr_dict['season'] = season
if any(word in str(curr_dict['title']).lower() for word in theatrical_list):
curr_dict['theatrical'] = True
theatrical_flag = True
else:
curr_dict['theatrical'] = False
if 'official' in str(curr_dict['title']).lower():
curr_dict['official'] = True
official_flag = True
if season:
official_flag = False
curr_dict['official'] = False
else:
curr_dict['official'] = False
if season and not season in season_list:
season_list.append(season)
curr_dict['thumbnail'] = trailer['thumbnail']['url']
curr_dict['runtime'] = trailer['runtime']['value']
curr_dict['time'] = time_format(trailer['runtime']['value'])
#print(curr_dict['title'])
new_trailer_list.append(curr_dict)
if season_number and season_number in season_list:
season_match = True
elif season_list != []:
if season_number:
for i in reversed(sorted(season_list)):
if i <= season_number:
break
season_match = i
else:
season_match = False
else:
season_match = False
if type(season_match) == type(season_number):
if season_match > season_number:
season_match = False
offical_trailer = None
season_trailer = None
if season_match == True and type(season_match) == type(True):
for trailer in new_trailer_list:
if trailer['season'] == season_number:
season_trailer = trailer
break
elif season_match == False:
season_trailer = new_trailer_list[0]
else:
for trailer in new_trailer_list:
if trailer['season'] == season_match:
season_trailer = trailer
break
if theatrical_flag == True:
for trailer in new_trailer_list:
if trailer['theatrical']:
offical_trailer = trailer
break
elif official_flag == True:
for trailer in new_trailer_list:
if trailer['official'] and not 'teaser' in str(trailer['title']).lower():
offical_trailer = trailer
break
if not offical_trailer:
for trailer in new_trailer_list:
if trailer['official']:
offical_trailer = trailer
break
elif titleText:
for trailer in new_trailer_list:
if trailer['title'] == titleText:
offical_trailer = trailer
break
if offical_trailer and official_flag:
if season_match == False or season_trailer == None:
season_trailer = offical_trailer
elif official_flag == False and offical_trailer:
if season_match == False:
season_trailer = offical_trailer
#print(new_trailer_list)
#print(titleText)
return season_trailer
def extract_imdb_mp4_url(video_id):
url = f"https://www.imdb.com/video/{video_id}?ref_=ttvg_vi_26"
headers = {'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_11_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/50.0.2661.102 Safari/537.36'}
response = requests.get(url, headers=headers)
if response.status_code != 200:
raise Exception(f"Failed to fetch page: {response.status_code}")
html = response.text
PlaybackURL = ('[' + html.split('"playbackURLs":[')[1].split('}]')[0] + '}]')
url = None
for i in eval(PlaybackURL):
if i['videoMimeType'] == 'MP4':
return i['url'], i
else:
if not url:
url = i['url']
video = i
#print(i['videoDefinition'])
#print(i['videoMimeType'])
return url, video
all_videos = get_imdb_videos(imdb_id='tt4532368')
#print(all_videos)
best_trailer = find_best_trailer(all_videos, season_number=None)
if best_trailer:
print(best_trailer)
print(best_trailer['title'])
video_url, video = extract_imdb_mp4_url(best_trailer['id'])
print("MP4 URL:", video_url)
print(video)
exit()
#print(json.dumps(all_videos[:3], indent=2)) # Show first 3 videos
print(f"Total videos fetched: {len(all_videos)}")
print(all_videos)
for i in all_videos:
if 'contenttype.trailer' in str(i) and 'season' in str(i['name']['value']).lower():
print(i['name']['value'],' - ' ,time_format(i['runtime']['value']))
@Gujal00
Copy link

Gujal00 commented Aug 10, 2025

Yes I already tried contenttype as a filter key and it came with incorrect parameter response, so yeah that is exactly what I am targeting and yet to figure out

@henryjfry
Copy link
Author

Think this should be what you need:

"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},

import json
import requests
import time

def get_imdb_videos(imdb_id):
	import re, requests
	API_URL = "https://graphql.prod.api.imdb.a2z.com/"
	HEADERS = {
		'Referer': 'https://www.imdb.com/',
		'Origin': 'https://www.imdb.com',
		'User-Agent': 'Mozilla/5.0'
	}

	def gqlmin(q):
		return re.sub(' {4}', '', q)

	query_subpage = '''
	query TitleVideoGallerySubPage(
		$const: ID!,
		$first: Int!,
		$filter: VideosQueryFilter,
		$sort: VideoSort
	) {
		title(id: $const) {
			titleText { text }
			plot { plotText { plainText } }
			videoStrip(first: $first, filter: $filter, sort: $sort) {
				...VideoGalleryItems
			}
		}
	}
	'''
	query_pagination = '''
	query TitleVideoGalleryPagination(
		$const: ID!,
		$first: Int!,
		$after: ID!,
		$filter: VideosQueryFilter,
		$sort: VideoSort
	) {
		title(id: $const) {
			videoStrip(first: $first, after: $after, filter: $filter, sort: $sort) {
				...VideoGalleryItems
			}
		}
	}
	'''
	fragment = '''
	fragment VideoGalleryItems on VideoConnection {
		pageInfo {
			endCursor
			hasNextPage
		}
		total
		edges {
			node {
				id
				contentType { id }
				name { value }
				runtime { value }
				thumbnail { url }
				primaryTitle {
					series {
						displayableEpisodeNumber {
							displayableSeason {
								season
							}
						}
						series {
							titleText { text }
						}
					}
				}
			}
		}
	}
	'''

	variables = {
		"const": imdb_id,
		"first": 50,
		"filter": {"maturityLevel": "INCLUDE_MATURE","nameConstraints":{},"titleConstraints":{},"types":["TRAILER"]},
		"sort": {"by": "DATE", "order": "DESC"}
	}

	videos = []
	plot_text = ""
	item_title = ""
	total_videos = None

	# First page
	pdata = {
		'operationName': "TitleVideoGallerySubPage",
		'query': gqlmin(query_subpage + fragment),
		'variables': variables
	}
	r = requests.post(API_URL, headers=HEADERS, json=pdata)
	r.raise_for_status()
	json_data = r.json()

	title_data = json_data.get('data', {}).get('title', {})
	plot_text = title_data.get('plot', {}).get('plotText', {}).get('plainText', "")
	item_title = title_data.get('titleText', {}).get('text', "")

	video_data = title_data.get('videoStrip', {})
	total_videos = video_data.get('total')
	videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])

	cursor = video_data.get('pageInfo', {}).get('endCursor')
	has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)

	# Pagination loop
	while has_next and cursor:
		variables["after"] = cursor
		pdata = {
			'operationName': "TitleVideoGalleryPagination",
			'query': gqlmin(query_pagination + fragment),
			'variables': variables
		}
		r = requests.post(API_URL, headers=HEADERS, json=pdata)
		r.raise_for_status()
		video_data = r.json().get('data', {}).get('title', {}).get('videoStrip', {})
		videos.extend([edge.get('node', {}) for edge in video_data.get('edges', [])])
		cursor = video_data.get('pageInfo', {}).get('endCursor')
		has_next = video_data.get('pageInfo', {}).get('hasNextPage', False)
		time.sleep(0.3)

	# Match old output: inject plot, total, and item_title
	for idx, v in enumerate(videos):
		v["plot"] = plot_text
		v["total"] = total_videos
		v["item_title"] = item_title
		videos[idx] = v

	return videos




all_videos = get_imdb_videos(imdb_id='tt11280740')
print(all_videos)
exit()

@henryjfry
Copy link
Author

however other than "types", nameConstraints are "nameConstraints":{allNameIds":["nm0004395","nm3138882"]} "

And titleconstraints:

nameConstraints: {
                    allNameIds: r.nameIds?.sort( (e, t) => e.localeCompare(t))
                },
                titleConstraints: {
                    anyTitleIds: r.titleIds?.sort( (e, t) => e.localeCompare(t))

ie anyTitleIds being - tt11280740 eg IMDB ids. So not actually "video clip name" or "video clip title"

@henryjfry
Copy link
Author

found a small issue when a trailer contains Final but is for final season eg. Beter Call Sault "Better Call Saul: A Look At The Final Season" It gets picked up as theatrical trailer To stop this, i just did a 2nd check that it doesnt contain season

@Gujal00, @matthuisman

checkout Gujal00/Kodi-Official@c556e4e

I've provided working API lookups for all the pages currently scraped:

VideoPlayback => https://www.imdb.com/video/vi1020905497/?ref_=ttvg_vi_1
CalendarPage => https://www.imdb.com/calendar/?region=US&type=MOVIE&ref_=rlm
movies_near_you => https://www.imdb.com/showtimes/

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment