Created
November 1, 2011 02:06
-
-
Save vofik/1329651 to your computer and use it in GitHub Desktop.
Plex scanner/stacker improvements
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# | |
# Copyright (c) 2010 Plex Development Team. All rights reserved. | |
# | |
import re, os, os.path | |
import Media, VideoFiles, Stack, Utils | |
SeriesScanner = __import__('Plex Series Scanner') | |
nice_match = '(.+) [\(\[]([1-2][0-9]{3})[\)\]]' | |
standalone_tv_regexs = [ '(.*?)( \(([0-9]+)\))? - ([0-9])+x([0-9]+)(-[0-9]+[Xx]([0-9]+))? - (.*)' ] | |
# Scans through files, and add to the media list. | |
def Scan(path, files, mediaList, subdirs, language=None, **kwargs): | |
# Scan for video files. | |
VideoFiles.Scan(path, files, mediaList, subdirs) | |
# Check for DVD rips. | |
paths = Utils.SplitPath(path) | |
video_ts = ContainsFile(files, 'video_ts.ifo') | |
if video_ts is None: | |
video_ts = ContainsFile(files, 'video_ts.bup') | |
if len(paths) >= 1 and len(paths[0]) > 0 and video_ts is not None: | |
print "Found a DVD" | |
name = year = None | |
# Now find the name. | |
if paths[-1].lower() == 'video_ts' and len(paths) >= 2: | |
# Easiest case. | |
(name, year) = VideoFiles.CleanName(paths[-2]) | |
else: | |
# Work up until we find a viable candidate. | |
backwardsPaths = paths | |
backwardsPaths.reverse() | |
for p in backwardsPaths: | |
if re.match(nice_match, p): | |
(name, year) = VideoFiles.CleanName(p) | |
break | |
if name is None: | |
# Use the topmost path. | |
(name, year) = VideoFiles.CleanName(paths[0]) | |
movie = Media.Movie(name, year) | |
# Add the video_ts file first. | |
movie.parts.append(video_ts) | |
biggestFile = None | |
biggestSize = 0 | |
for i in files: | |
if os.path.splitext(i)[1].lower() == '.vob' and os.path.getsize(i) > biggestSize: | |
biggestSize = os.path.getsize(i) | |
biggestFile = i | |
# Add the biggest part so that we can get thumbnail/art/analysis from it. | |
if biggestFile is not None: | |
movie.parts.append(biggestFile) | |
if len(movie.parts) > 0: | |
movie.guid = checkNfoFile(movie.parts[0], 1) | |
mediaList.append(movie) | |
# Check for Bluray rips. | |
elif len(paths) >= 3 and paths[-1].lower() == 'stream' and paths[-2].lower() == 'bdmv': | |
(name, year) = VideoFiles.CleanName(paths[-3]) | |
movie = Media.Movie(name, year) | |
for i in files: | |
movie.parts.append(i) | |
mediaList.append(movie) | |
else: | |
# Make movies! | |
for i in files: | |
file = os.path.basename(i) | |
(name, year) = VideoFiles.CleanName(os.path.splitext(file)[0]) | |
# If it matches a TV show, don't scan it as a movie. | |
tv = False | |
for rx in SeriesScanner.episode_regexps[0:-1]: | |
if re.match(rx, name): | |
print "The file", file, "looked like a TV show so we're skipping it (", rx, ")" | |
tv = True | |
if tv == False: | |
# OK, it's a movie | |
movie = Media.Movie(name, year) | |
movie.source = VideoFiles.RetrieveSource(file) | |
movie.parts.append(i) | |
mediaList.append(movie) | |
# Stack the results. | |
Stack.Scan(path, files, mediaList, subdirs) | |
# Clean the folder name and try a match on the folder. | |
if len(path) > 0: | |
folderName = os.path.basename(path).replace(' ', ' ').replace(' ','.') | |
(cleanName, year) = VideoFiles.CleanName(folderName) | |
if len(mediaList) == 1 and re.match(nice_match, cleanName): | |
res = re.findall(nice_match, cleanName) | |
mediaList[0].name = res[0][0] | |
mediaList[0].year = res[0][1] | |
elif len(mediaList) == 1 and (len(cleanName) > 1 or year is not None): | |
mediaList[0].name = cleanName | |
mediaList[0].year = year | |
# Check for a folder with multiple 'CD' subfolders and massage | |
foundCDsubdirs = {} | |
for s in subdirs: | |
m = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', os.path.basename(s).lower()) | |
if m: | |
foundSubSubDirs = False | |
for subsubdir in os.listdir(s): | |
if os.path.isdir(os.path.join(s, subsubdir)): | |
subm = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', subsubdir.lower()) | |
if subm: | |
foundSubSubDirs = True | |
foundCDsubdirs[m.groups(1)[0] + '-' + subm.groups(1)[0]] = os.path.join(s, subsubdir) | |
if foundSubSubDirs == False: | |
foundCDsubdirs['0-' + m.groups(1)[0]] = s | |
# More than one cd subdir, let's stack and whack subdirs. | |
if len(foundCDsubdirs) > 1: | |
name, year = VideoFiles.CleanName(os.path.basename(path)) | |
movie = Media.Movie(name, year) | |
movie.guid = checkNfoFile(os.path.dirname(foundCDsubdirs.values()[0]), 1) | |
keys = foundCDsubdirs.keys() | |
keys.sort() | |
for key in keys: | |
d = foundCDsubdirs[key] | |
subFiles = [] | |
for f in os.listdir(d): | |
subFiles.append(os.path.join(d,f)) | |
VideoFiles.Scan(d, subFiles, mediaList, []) | |
if foundSubSubDirs: | |
if os.path.dirname(d) in subdirs: | |
subdirs.remove(os.path.dirname(d)) | |
else: | |
subdirs.remove(d) | |
movie.parts += subFiles | |
if len(movie.parts) > 0: | |
mediaList.append(movie) | |
# See if we can find a GUID. | |
for mediaItem in mediaList: | |
if mediaItem.guid is None: | |
mediaItem.guid = checkNfoFile(mediaItem.parts[0], len(mediaList)) | |
if len(mediaList) == 1: | |
if mediaList[0].source is None: | |
mediaList[0].source = VideoFiles.RetrieveSource(path) | |
# If the subdirectories indicate that we're inside a DVD, when whack things other than audio and video. | |
whack = [] | |
if 'video_ts' in [Utils.SplitPath(s)[-1].lower() for s in subdirs]: | |
for dir in subdirs: | |
d = os.path.basename(dir).lower() | |
if d not in ['video_ts', 'audio_ts']: | |
whack.append(dir) | |
# Finally, if any of the subdirectories match a TV show, don't enter! | |
for dir in subdirs: | |
for rx in standalone_tv_regexs: | |
res = re.findall(rx, dir) | |
if len(res): | |
whack.append(dir) | |
for w in whack: | |
subdirs.remove(w) | |
def ContainsFile(files, file): | |
for i in files: | |
if os.path.basename(i).lower() == file.lower(): | |
return i | |
return None | |
def checkNfoFile(file, fileCount): | |
try: | |
path = None | |
# Depending on how many media files we have, check differently. | |
if fileCount == 1: | |
# Look for any NFO file. | |
for f in os.listdir(os.path.dirname(file)): | |
if f[-4:].lower() == '.nfo': | |
path = os.path.join(os.path.dirname(file), f) | |
break | |
else: | |
# Look for a sidecar NFO file. | |
path = os.path.splitext(file)[0] + '.nfo' | |
if path is not None and os.path.exists(path): | |
nfoText = open(path).read() | |
m = re.search('(tt[0-9]+)', nfoText) | |
if m: | |
return m.groups(1)[0] | |
except: | |
print "Warning, couldn't read NFO file." | |
return None |
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import Media, VideoFiles | |
import os.path, difflib | |
import re | |
def compareFilenames(elem): | |
return elem.parts[0].lower() | |
def Scan(dir, files, mediaList, subdirs): | |
# Go through the files and see if any of them need to be stacked. | |
stack_dict = {} | |
stackDiffs = r'[\da-n]' # These are the characters we are looking for being different across stackable filenames | |
stackSuffixes = r'(?:cd|dvd|part|pt|disk|disc|scene)\.?(?:\d+)?$' | |
scenePrefixes = r'(?:^scene.\d+|scene.\d+$)' | |
# Sort the mediaList by filename, so we can do our compares properly | |
mediaList[:] = sorted(mediaList, key=compareFilenames) | |
# check for monotonically increasing numeric or alphabetic filenames | |
count = 0 | |
monotonicSeries = False | |
for mediaItem in mediaList[:-1]: | |
# if it didn't start as a monotonic series, it's not going to become one | |
if count > 0 and monotonicSeries == False: | |
break | |
# if items were already stacked by other method, skip this attempt | |
if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True: | |
continue | |
m1 = mediaList[count] | |
m2 = mediaList[count + 1] | |
f1 = os.path.basename(os.path.splitext(m1.parts[0])[0]).strip().lower() | |
f2 = os.path.basename(os.path.splitext(m2.parts[0])[0]).strip().lower() | |
initialA = re.search(r'(^\d+)', f1) | |
initialB = re.search(r'(^\d+)', f2) | |
terminalA = re.search(r'(\d+)$', f1) | |
terminalB = re.search(r'(\d+)$', f2) | |
# if the filenames both start, or both end with a digit, | |
# and the digit of the second filename is 1 larger than the one of the first filename, it's a series | |
if(((initialA and initialB) and (int(initialA.group(0)) == int(initialB.group(0)) - 1)) or | |
((terminalA and terminalB) and (int(terminalA.group(0)) == int(terminalB.group(0)) - 1))): | |
monotonicSeries = True | |
# if the filenames both start, or both end with a letter, | |
# and the letter seems to the correct one for this iteration if we started from "a", | |
# and the letter of the second filename is 1 larger than the one of the first filename, it's a series | |
if(monotonicSeries == False): | |
initialA = re.search(r'(^[a-y])', f1) | |
initialB = re.search(r'(^[a-y])', f2) | |
terminalA = re.search(r'([a-y])$', f1) | |
terminalB = re.search(r'([a-y])$', f2) | |
if(((initialA and initialB) and (ord(initialA.group(0)) == ord('a') + count and ord(initialA.group(0)) == ord(initialB.group(0)) - 1)) or | |
((terminalA and terminalB) and (ord(terminalA.group(0)) == ord('a') + count and ord(terminalA.group(0)) == ord(terminalB.group(0)) - 1))): | |
monotonicSeries = True | |
if monotonicSeries: | |
m1.name = dir | |
root = '_monotonic' | |
m1.stacked = True | |
if stack_dict.has_key(root): | |
stack_dict[root].append(m2) | |
# only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start | |
if count == len(mediaList) - 1: | |
m2.stacked = True | |
else: | |
stack_dict[root] = [m1] | |
stack_dict[root].append(m2) | |
count += 1 | |
# group scene-based movie splits into a stack | |
for mediaItem in mediaList: | |
# if items were already stacked by other method, skip this attempt | |
if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True: | |
continue | |
f1 = os.path.basename(os.path.splitext(mediaItem.parts[0])[0]).lower() | |
if re.match(scenePrefixes, f1): | |
(name, year) = VideoFiles.CleanName(re.sub(scenePrefixes, '', f1)) | |
root = '_scene' | |
mediaItem.name = name | |
if stack_dict.has_key(root): | |
stack_dict[root].append(mediaItem) | |
mediaItem.stacked = True | |
else: | |
stack_dict[root] = [mediaItem] | |
mediaItem.stacked = True | |
# Search for prefix-based part names. | |
count = 0 | |
for mediaItem in mediaList[:-1]: | |
m1 = mediaList[count] | |
m2 = mediaList[count + 1] | |
# if items were already stacked by other method, skip this attempt | |
if hasattr(m1, 'stacked') and m1.stacked == True: | |
continue | |
f1 = os.path.basename(m1.parts[0]) | |
f2 = os.path.basename(m2.parts[0]) | |
opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes() | |
if len(opcodes) == 3: # We only have one transform | |
(tag, i1, i2, j1, j2) = opcodes[1] | |
if tag == 'replace': # The transform is a replace | |
if (i2-i1 <= 2) and (j2-j1 <= 2): # The transform is only one character | |
if re.search(stackDiffs, f1[i1:i2].lower()): # That one character is 1-4 or a-n | |
root = f1[:i1].strip(' _-') | |
xOfy = False | |
if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it | |
xOfy = True | |
#prefix = f1[:i1] + f1[i2:] | |
#(root, ext) = os.path.splitext(prefix) | |
# This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2] | |
if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'): | |
# Strip any suffixes like CD, DVD. | |
foundSuffix = False | |
suffixMatch = re.search(stackSuffixes, root.lower().strip()) | |
if suffixMatch: | |
root = root[0:-len(suffixMatch.group(0))].strip(' _-') | |
foundSuffix = True | |
if foundSuffix or xOfy: | |
# Replace the name, which probably had the suffix. | |
(name, year) = VideoFiles.CleanName(root) | |
# pdb.set_trace() | |
mediaItem.name = name | |
m1.stacked = True | |
if stack_dict.has_key(root): | |
stack_dict[root].append(m2) | |
# only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start | |
if count == len(mediaList) - 1: | |
m2.stacked = True | |
else: | |
stack_dict[root] = [m1] | |
stack_dict[root].append(m2) | |
count += 1 | |
# combine stacks if possible | |
count = 0 | |
stacks = stack_dict.keys() | |
for stack in stacks[:-1]: | |
s1 = stacks[count] | |
s2 = stacks[count + 1] | |
opcodes = difflib.SequenceMatcher(None, s1, s2).get_opcodes() | |
if len(opcodes) == 2: # We only have one transform | |
(tag, i1, i2, j1, j2) = opcodes[1] | |
if tag == 'replace': # The transform is a replace | |
if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character | |
if re.search(stackDiffs, s1): # That one character is 1-4 or a-n | |
root = s1.lower().strip() | |
suffixMatch = re.search(stackSuffixes, root) | |
if suffixMatch: | |
root = root[0:-len(suffixMatch.group(0))].strip(' -') | |
(name, year) = VideoFiles.CleanName(root) | |
# merge existing two stacks into new root | |
for oldstack in [s1, s2]: | |
for media in stack_dict[oldstack]: | |
media.name = name | |
if stack_dict.has_key(root): | |
for media in stack_dict[oldstack]: | |
stack_dict[root].append(media) | |
else: | |
stack_dict[root] = stack_dict[oldstack] | |
del stack_dict[oldstack] | |
count += 1 | |
# Now combine stacked parts | |
for stack in stack_dict.keys(): | |
for media in stack_dict[stack][1:]: | |
stack_dict[stack][0].parts.append(media.parts[0]) | |
mediaList.remove(media) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment