Created
November 1, 2011 02:06
Revisions
-
vofik revised this gist
Nov 1, 2011 . 1 changed file with 142 additions and 21 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -1,68 +1,189 @@ import Media, VideoFiles import os.path, difflib import re def compareFilenames(elem): return elem.parts[0].lower() def Scan(dir, files, mediaList, subdirs): # Go through the files and see if any of them need to be stacked. stack_dict = {} stackDiffs = r'[\da-n]' # These are the characters we are looking for being different across stackable filenames stackSuffixes = r'(?:cd|dvd|part|pt|disk|disc|scene)\.?(?:\d+)?$' scenePrefixes = r'(?:^scene.\d+|scene.\d+$)' # Sort the mediaList by filename, so we can do our compares properly mediaList[:] = sorted(mediaList, key=compareFilenames) # check for monotonically increasing numeric or alphabetic filenames count = 0 monotonicSeries = False for mediaItem in mediaList[:-1]: # if it didn't start as a monotonic series, it's not going to become one if count > 0 and monotonicSeries == False: break # if items were already stacked by other method, skip this attempt if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True: continue m1 = mediaList[count] m2 = mediaList[count + 1] f1 = os.path.basename(os.path.splitext(m1.parts[0])[0]).strip().lower() f2 = os.path.basename(os.path.splitext(m2.parts[0])[0]).strip().lower() initialA = re.search(r'(^\d+)', f1) initialB = re.search(r'(^\d+)', f2) terminalA = re.search(r'(\d+)$', f1) terminalB = re.search(r'(\d+)$', f2) # if the filenames both start, or both end with a digit, # and the digit of the second filename is 1 larger than the one of the first filename, it's a series if(((initialA and initialB) and (int(initialA.group(0)) == int(initialB.group(0)) - 1)) or ((terminalA and terminalB) and (int(terminalA.group(0)) == int(terminalB.group(0)) - 1))): monotonicSeries = True # if the filenames both start, or both end with a letter, # and the letter seems to the correct one for this iteration if we started from "a", # and the letter of the second filename is 1 larger than the one of the first filename, it's a series if(monotonicSeries == False): initialA = re.search(r'(^[a-y])', f1) initialB = re.search(r'(^[a-y])', f2) terminalA = re.search(r'([a-y])$', f1) terminalB = re.search(r'([a-y])$', f2) if(((initialA and initialB) and (ord(initialA.group(0)) == ord('a') + count and ord(initialA.group(0)) == ord(initialB.group(0)) - 1)) or ((terminalA and terminalB) and (ord(terminalA.group(0)) == ord('a') + count and ord(terminalA.group(0)) == ord(terminalB.group(0)) - 1))): monotonicSeries = True if monotonicSeries: m1.name = dir root = '_monotonic' m1.stacked = True if stack_dict.has_key(root): stack_dict[root].append(m2) # only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start if count == len(mediaList) - 1: m2.stacked = True else: stack_dict[root] = [m1] stack_dict[root].append(m2) count += 1 # group scene-based movie splits into a stack for mediaItem in mediaList: # if items were already stacked by other method, skip this attempt if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True: continue f1 = os.path.basename(os.path.splitext(mediaItem.parts[0])[0]).lower() if re.match(scenePrefixes, f1): (name, year) = VideoFiles.CleanName(re.sub(scenePrefixes, '', f1)) root = '_scene' mediaItem.name = name if stack_dict.has_key(root): stack_dict[root].append(mediaItem) mediaItem.stacked = True else: stack_dict[root] = [mediaItem] mediaItem.stacked = True # Search for prefix-based part names. count = 0 for mediaItem in mediaList[:-1]: m1 = mediaList[count] m2 = mediaList[count + 1] # if items were already stacked by other method, skip this attempt if hasattr(m1, 'stacked') and m1.stacked == True: continue f1 = os.path.basename(m1.parts[0]) f2 = os.path.basename(m2.parts[0]) opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes() if len(opcodes) == 3: # We only have one transform (tag, i1, i2, j1, j2) = opcodes[1] if tag == 'replace': # The transform is a replace if (i2-i1 <= 2) and (j2-j1 <= 2): # The transform is only one character if re.search(stackDiffs, f1[i1:i2].lower()): # That one character is 1-4 or a-n root = f1[:i1].strip(' _-') xOfy = False if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it xOfy = True #prefix = f1[:i1] + f1[i2:] #(root, ext) = os.path.splitext(prefix) # This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2] if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'): # Strip any suffixes like CD, DVD. foundSuffix = False suffixMatch = re.search(stackSuffixes, root.lower().strip()) if suffixMatch: root = root[0:-len(suffixMatch.group(0))].strip(' _-') foundSuffix = True if foundSuffix or xOfy: # Replace the name, which probably had the suffix. (name, year) = VideoFiles.CleanName(root) # pdb.set_trace() mediaItem.name = name m1.stacked = True if stack_dict.has_key(root): stack_dict[root].append(m2) # only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start if count == len(mediaList) - 1: m2.stacked = True else: stack_dict[root] = [m1] stack_dict[root].append(m2) count += 1 # combine stacks if possible count = 0 stacks = stack_dict.keys() for stack in stacks[:-1]: s1 = stacks[count] s2 = stacks[count + 1] opcodes = difflib.SequenceMatcher(None, s1, s2).get_opcodes() if len(opcodes) == 2: # We only have one transform (tag, i1, i2, j1, j2) = opcodes[1] if tag == 'replace': # The transform is a replace if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character if re.search(stackDiffs, s1): # That one character is 1-4 or a-n root = s1.lower().strip() suffixMatch = re.search(stackSuffixes, root) if suffixMatch: root = root[0:-len(suffixMatch.group(0))].strip(' -') (name, year) = VideoFiles.CleanName(root) # merge existing two stacks into new root for oldstack in [s1, s2]: for media in stack_dict[oldstack]: media.name = name if stack_dict.has_key(root): for media in stack_dict[oldstack]: stack_dict[root].append(media) else: stack_dict[root] = stack_dict[oldstack] del stack_dict[oldstack] count += 1 # Now combine stacked parts for stack in stack_dict.keys(): for media in stack_dict[stack][1:]: stack_dict[stack][0].parts.append(media.parts[0]) mediaList.remove(media) -
vofik created this gist
Nov 1, 2011 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,208 @@ # # Copyright (c) 2010 Plex Development Team. All rights reserved. # import re, os, os.path import Media, VideoFiles, Stack, Utils SeriesScanner = __import__('Plex Series Scanner') nice_match = '(.+) [\(\[]([1-2][0-9]{3})[\)\]]' standalone_tv_regexs = [ '(.*?)( \(([0-9]+)\))? - ([0-9])+x([0-9]+)(-[0-9]+[Xx]([0-9]+))? - (.*)' ] # Scans through files, and add to the media list. def Scan(path, files, mediaList, subdirs, language=None, **kwargs): # Scan for video files. VideoFiles.Scan(path, files, mediaList, subdirs) # Check for DVD rips. paths = Utils.SplitPath(path) video_ts = ContainsFile(files, 'video_ts.ifo') if video_ts is None: video_ts = ContainsFile(files, 'video_ts.bup') if len(paths) >= 1 and len(paths[0]) > 0 and video_ts is not None: print "Found a DVD" name = year = None # Now find the name. if paths[-1].lower() == 'video_ts' and len(paths) >= 2: # Easiest case. (name, year) = VideoFiles.CleanName(paths[-2]) else: # Work up until we find a viable candidate. backwardsPaths = paths backwardsPaths.reverse() for p in backwardsPaths: if re.match(nice_match, p): (name, year) = VideoFiles.CleanName(p) break if name is None: # Use the topmost path. (name, year) = VideoFiles.CleanName(paths[0]) movie = Media.Movie(name, year) # Add the video_ts file first. movie.parts.append(video_ts) biggestFile = None biggestSize = 0 for i in files: if os.path.splitext(i)[1].lower() == '.vob' and os.path.getsize(i) > biggestSize: biggestSize = os.path.getsize(i) biggestFile = i # Add the biggest part so that we can get thumbnail/art/analysis from it. if biggestFile is not None: movie.parts.append(biggestFile) if len(movie.parts) > 0: movie.guid = checkNfoFile(movie.parts[0], 1) mediaList.append(movie) # Check for Bluray rips. elif len(paths) >= 3 and paths[-1].lower() == 'stream' and paths[-2].lower() == 'bdmv': (name, year) = VideoFiles.CleanName(paths[-3]) movie = Media.Movie(name, year) for i in files: movie.parts.append(i) mediaList.append(movie) else: # Make movies! for i in files: file = os.path.basename(i) (name, year) = VideoFiles.CleanName(os.path.splitext(file)[0]) # If it matches a TV show, don't scan it as a movie. tv = False for rx in SeriesScanner.episode_regexps[0:-1]: if re.match(rx, name): print "The file", file, "looked like a TV show so we're skipping it (", rx, ")" tv = True if tv == False: # OK, it's a movie movie = Media.Movie(name, year) movie.source = VideoFiles.RetrieveSource(file) movie.parts.append(i) mediaList.append(movie) # Stack the results. Stack.Scan(path, files, mediaList, subdirs) # Clean the folder name and try a match on the folder. if len(path) > 0: folderName = os.path.basename(path).replace(' ', ' ').replace(' ','.') (cleanName, year) = VideoFiles.CleanName(folderName) if len(mediaList) == 1 and re.match(nice_match, cleanName): res = re.findall(nice_match, cleanName) mediaList[0].name = res[0][0] mediaList[0].year = res[0][1] elif len(mediaList) == 1 and (len(cleanName) > 1 or year is not None): mediaList[0].name = cleanName mediaList[0].year = year # Check for a folder with multiple 'CD' subfolders and massage foundCDsubdirs = {} for s in subdirs: m = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', os.path.basename(s).lower()) if m: foundSubSubDirs = False for subsubdir in os.listdir(s): if os.path.isdir(os.path.join(s, subsubdir)): subm = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', subsubdir.lower()) if subm: foundSubSubDirs = True foundCDsubdirs[m.groups(1)[0] + '-' + subm.groups(1)[0]] = os.path.join(s, subsubdir) if foundSubSubDirs == False: foundCDsubdirs['0-' + m.groups(1)[0]] = s # More than one cd subdir, let's stack and whack subdirs. if len(foundCDsubdirs) > 1: name, year = VideoFiles.CleanName(os.path.basename(path)) movie = Media.Movie(name, year) movie.guid = checkNfoFile(os.path.dirname(foundCDsubdirs.values()[0]), 1) keys = foundCDsubdirs.keys() keys.sort() for key in keys: d = foundCDsubdirs[key] subFiles = [] for f in os.listdir(d): subFiles.append(os.path.join(d,f)) VideoFiles.Scan(d, subFiles, mediaList, []) if foundSubSubDirs: if os.path.dirname(d) in subdirs: subdirs.remove(os.path.dirname(d)) else: subdirs.remove(d) movie.parts += subFiles if len(movie.parts) > 0: mediaList.append(movie) # See if we can find a GUID. for mediaItem in mediaList: if mediaItem.guid is None: mediaItem.guid = checkNfoFile(mediaItem.parts[0], len(mediaList)) if len(mediaList) == 1: if mediaList[0].source is None: mediaList[0].source = VideoFiles.RetrieveSource(path) # If the subdirectories indicate that we're inside a DVD, when whack things other than audio and video. whack = [] if 'video_ts' in [Utils.SplitPath(s)[-1].lower() for s in subdirs]: for dir in subdirs: d = os.path.basename(dir).lower() if d not in ['video_ts', 'audio_ts']: whack.append(dir) # Finally, if any of the subdirectories match a TV show, don't enter! for dir in subdirs: for rx in standalone_tv_regexs: res = re.findall(rx, dir) if len(res): whack.append(dir) for w in whack: subdirs.remove(w) def ContainsFile(files, file): for i in files: if os.path.basename(i).lower() == file.lower(): return i return None def checkNfoFile(file, fileCount): try: path = None # Depending on how many media files we have, check differently. if fileCount == 1: # Look for any NFO file. for f in os.listdir(os.path.dirname(file)): if f[-4:].lower() == '.nfo': path = os.path.join(os.path.dirname(file), f) break else: # Look for a sidecar NFO file. path = os.path.splitext(file)[0] + '.nfo' if path is not None and os.path.exists(path): nfoText = open(path).read() m = re.search('(tt[0-9]+)', nfoText) if m: return m.groups(1)[0] except: print "Warning, couldn't read NFO file." return None This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,68 @@ import Media, VideoFiles import os.path, difflib def compareFilenames(elem): return elem.parts[0] def Scan(dir, files, mediaList, subdirs): # Go through the files and see if any of them need to be stacked. stack_dict = {} stackDiffs = '123456789abcdefghijklmn' # These are the characters we are looking for being different across stackable filenames stackSuffixes = ['cd', 'dvd', 'part', 'pt', 'disk', 'disc'] # Sort the mediaList by filename, so we can do our compares properly mediaList[:] = sorted(mediaList, key=compareFilenames) # Search for parts. count = 0 for mediaItem in mediaList[:-1]: m1 = mediaList[count] m2 = mediaList[count + 1] f1 = os.path.basename(m1.parts[0]) f2 = os.path.basename(m2.parts[0]) opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes() if len(opcodes) == 3: # We only have one transform (tag, i1, i2, j1, j2) = opcodes[1] if tag == 'replace': # The transform is a replace if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character if 1 in [c in f1[i1:i2].lower() for c in stackDiffs]: # That one character is 1-4 or a-d root = f1[:i1] xOfy = False if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it xOfy = True #prefix = f1[:i1] + f1[i2:] #(root, ext) = os.path.splitext(prefix) # Fix cases where it is something like part 01 ... part 02 -- remove that 0, so the suffix check works later if root[-1:] == '0': root = root[:-1] # This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2] if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'): # Strip any suffixes like CD, DVD. foundSuffix = False for suffix in stackSuffixes: if root.lower().strip().endswith(suffix): root = root[0:-len(suffix)].strip() foundSuffix = True break if foundSuffix or xOfy: # Replace the name, which probably had the suffix. (name, year) = VideoFiles.CleanName(root) mediaItem.name = name if stack_dict.has_key(root): stack_dict[root].append(m2) else: stack_dict[root] = [m1] stack_dict[root].append(m2) count += 1 # Now combine stacked parts for stack in stack_dict.keys(): for media in stack_dict[stack][1:]: stack_dict[stack][0].parts.append(media.parts[0]) mediaList.remove(media)