Skip to content

Instantly share code, notes, and snippets.

@vofik
Created November 1, 2011 02:06

Revisions

  1. vofik revised this gist Nov 1, 2011. 1 changed file with 142 additions and 21 deletions.
    163 changes: 142 additions & 21 deletions Stack.py
    Original file line number Diff line number Diff line change
    @@ -1,68 +1,189 @@
    import Media, VideoFiles
    import os.path, difflib

    import re

    def compareFilenames(elem):
    return elem.parts[0]
    return elem.parts[0].lower()

    def Scan(dir, files, mediaList, subdirs):

    # Go through the files and see if any of them need to be stacked.
    stack_dict = {}
    stackDiffs = '123456789abcdefghijklmn' # These are the characters we are looking for being different across stackable filenames
    stackSuffixes = ['cd', 'dvd', 'part', 'pt', 'disk', 'disc']
    stackDiffs = r'[\da-n]' # These are the characters we are looking for being different across stackable filenames
    stackSuffixes = r'(?:cd|dvd|part|pt|disk|disc|scene)\.?(?:\d+)?$'
    scenePrefixes = r'(?:^scene.\d+|scene.\d+$)'

    # Sort the mediaList by filename, so we can do our compares properly
    mediaList[:] = sorted(mediaList, key=compareFilenames)

    # Search for parts.
    count = 0

    # check for monotonically increasing numeric or alphabetic filenames
    count = 0
    monotonicSeries = False
    for mediaItem in mediaList[:-1]:
    # if it didn't start as a monotonic series, it's not going to become one
    if count > 0 and monotonicSeries == False:
    break

    # if items were already stacked by other method, skip this attempt
    if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True:
    continue

    m1 = mediaList[count]
    m2 = mediaList[count + 1]
    f1 = os.path.basename(os.path.splitext(m1.parts[0])[0]).strip().lower()
    f2 = os.path.basename(os.path.splitext(m2.parts[0])[0]).strip().lower()

    initialA = re.search(r'(^\d+)', f1)
    initialB = re.search(r'(^\d+)', f2)
    terminalA = re.search(r'(\d+)$', f1)
    terminalB = re.search(r'(\d+)$', f2)

    # if the filenames both start, or both end with a digit,
    # and the digit of the second filename is 1 larger than the one of the first filename, it's a series
    if(((initialA and initialB) and (int(initialA.group(0)) == int(initialB.group(0)) - 1)) or
    ((terminalA and terminalB) and (int(terminalA.group(0)) == int(terminalB.group(0)) - 1))):
    monotonicSeries = True

    # if the filenames both start, or both end with a letter,
    # and the letter seems to the correct one for this iteration if we started from "a",
    # and the letter of the second filename is 1 larger than the one of the first filename, it's a series
    if(monotonicSeries == False):
    initialA = re.search(r'(^[a-y])', f1)
    initialB = re.search(r'(^[a-y])', f2)
    terminalA = re.search(r'([a-y])$', f1)
    terminalB = re.search(r'([a-y])$', f2)
    if(((initialA and initialB) and (ord(initialA.group(0)) == ord('a') + count and ord(initialA.group(0)) == ord(initialB.group(0)) - 1)) or
    ((terminalA and terminalB) and (ord(terminalA.group(0)) == ord('a') + count and ord(terminalA.group(0)) == ord(terminalB.group(0)) - 1))):
    monotonicSeries = True

    if monotonicSeries:

    m1.name = dir
    root = '_monotonic'

    m1.stacked = True
    if stack_dict.has_key(root):
    stack_dict[root].append(m2)
    # only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start
    if count == len(mediaList) - 1:
    m2.stacked = True
    else:
    stack_dict[root] = [m1]
    stack_dict[root].append(m2)

    count += 1

    # group scene-based movie splits into a stack
    for mediaItem in mediaList:
    # if items were already stacked by other method, skip this attempt
    if hasattr(mediaItem, 'stacked') and mediaItem.stacked == True:
    continue

    f1 = os.path.basename(os.path.splitext(mediaItem.parts[0])[0]).lower()
    if re.match(scenePrefixes, f1):
    (name, year) = VideoFiles.CleanName(re.sub(scenePrefixes, '', f1))
    root = '_scene'
    mediaItem.name = name

    if stack_dict.has_key(root):
    stack_dict[root].append(mediaItem)
    mediaItem.stacked = True
    else:
    stack_dict[root] = [mediaItem]
    mediaItem.stacked = True

    # Search for prefix-based part names.
    count = 0
    for mediaItem in mediaList[:-1]:
    m1 = mediaList[count]
    m2 = mediaList[count + 1]

    # if items were already stacked by other method, skip this attempt
    if hasattr(m1, 'stacked') and m1.stacked == True:
    continue

    f1 = os.path.basename(m1.parts[0])
    f2 = os.path.basename(m2.parts[0])

    opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes()

    if len(opcodes) == 3: # We only have one transform
    (tag, i1, i2, j1, j2) = opcodes[1]
    if tag == 'replace': # The transform is a replace
    if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character
    if 1 in [c in f1[i1:i2].lower() for c in stackDiffs]: # That one character is 1-4 or a-d
    root = f1[:i1]
    if (i2-i1 <= 2) and (j2-j1 <= 2): # The transform is only one character
    if re.search(stackDiffs, f1[i1:i2].lower()): # That one character is 1-4 or a-n
    root = f1[:i1].strip(' _-')
    xOfy = False

    if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it
    xOfy = True
    #prefix = f1[:i1] + f1[i2:]
    #(root, ext) = os.path.splitext(prefix)

    # Fix cases where it is something like part 01 ... part 02 -- remove that 0, so the suffix check works later
    if root[-1:] == '0':
    root = root[:-1]

    # This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2]
    if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'):

    # Strip any suffixes like CD, DVD.
    foundSuffix = False
    for suffix in stackSuffixes:
    if root.lower().strip().endswith(suffix):
    root = root[0:-len(suffix)].strip()
    foundSuffix = True
    break
    suffixMatch = re.search(stackSuffixes, root.lower().strip())

    if suffixMatch:
    root = root[0:-len(suffixMatch.group(0))].strip(' _-')
    foundSuffix = True

    if foundSuffix or xOfy:
    # Replace the name, which probably had the suffix.
    (name, year) = VideoFiles.CleanName(root)
    # pdb.set_trace()

    mediaItem.name = name
    m1.stacked = True
    if stack_dict.has_key(root):
    stack_dict[root].append(m2)
    # only mark the second item as stacked on last iteration, otherwise it'll break out of the loop in the start
    if count == len(mediaList) - 1:
    m2.stacked = True
    else:
    stack_dict[root] = [m1]
    stack_dict[root].append(m2)
    count += 1


    # combine stacks if possible
    count = 0
    stacks = stack_dict.keys()
    for stack in stacks[:-1]:
    s1 = stacks[count]
    s2 = stacks[count + 1]
    opcodes = difflib.SequenceMatcher(None, s1, s2).get_opcodes()

    if len(opcodes) == 2: # We only have one transform
    (tag, i1, i2, j1, j2) = opcodes[1]
    if tag == 'replace': # The transform is a replace
    if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character
    if re.search(stackDiffs, s1): # That one character is 1-4 or a-n
    root = s1.lower().strip()
    suffixMatch = re.search(stackSuffixes, root)
    if suffixMatch:
    root = root[0:-len(suffixMatch.group(0))].strip(' -')

    (name, year) = VideoFiles.CleanName(root)

    # merge existing two stacks into new root
    for oldstack in [s1, s2]:
    for media in stack_dict[oldstack]:
    media.name = name

    if stack_dict.has_key(root):
    for media in stack_dict[oldstack]:
    stack_dict[root].append(media)
    else:
    stack_dict[root] = stack_dict[oldstack]
    del stack_dict[oldstack]

    count += 1

    # Now combine stacked parts
    for stack in stack_dict.keys():
    for media in stack_dict[stack][1:]:
    stack_dict[stack][0].parts.append(media.parts[0])
    mediaList.remove(media)
    mediaList.remove(media)
  2. vofik created this gist Nov 1, 2011.
    208 changes: 208 additions & 0 deletions Plex Movie Scanner.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,208 @@
    #
    # Copyright (c) 2010 Plex Development Team. All rights reserved.
    #
    import re, os, os.path
    import Media, VideoFiles, Stack, Utils

    SeriesScanner = __import__('Plex Series Scanner')

    nice_match = '(.+) [\(\[]([1-2][0-9]{3})[\)\]]'
    standalone_tv_regexs = [ '(.*?)( \(([0-9]+)\))? - ([0-9])+x([0-9]+)(-[0-9]+[Xx]([0-9]+))? - (.*)' ]

    # Scans through files, and add to the media list.
    def Scan(path, files, mediaList, subdirs, language=None, **kwargs):

    # Scan for video files.
    VideoFiles.Scan(path, files, mediaList, subdirs)

    # Check for DVD rips.
    paths = Utils.SplitPath(path)
    video_ts = ContainsFile(files, 'video_ts.ifo')
    if video_ts is None:
    video_ts = ContainsFile(files, 'video_ts.bup')

    if len(paths) >= 1 and len(paths[0]) > 0 and video_ts is not None:
    print "Found a DVD"
    name = year = None

    # Now find the name.
    if paths[-1].lower() == 'video_ts' and len(paths) >= 2:
    # Easiest case.
    (name, year) = VideoFiles.CleanName(paths[-2])
    else:
    # Work up until we find a viable candidate.
    backwardsPaths = paths
    backwardsPaths.reverse()
    for p in backwardsPaths:
    if re.match(nice_match, p):
    (name, year) = VideoFiles.CleanName(p)
    break

    if name is None:
    # Use the topmost path.
    (name, year) = VideoFiles.CleanName(paths[0])

    movie = Media.Movie(name, year)

    # Add the video_ts file first.
    movie.parts.append(video_ts)

    biggestFile = None
    biggestSize = 0

    for i in files:
    if os.path.splitext(i)[1].lower() == '.vob' and os.path.getsize(i) > biggestSize:
    biggestSize = os.path.getsize(i)
    biggestFile = i

    # Add the biggest part so that we can get thumbnail/art/analysis from it.
    if biggestFile is not None:
    movie.parts.append(biggestFile)

    if len(movie.parts) > 0:
    movie.guid = checkNfoFile(movie.parts[0], 1)
    mediaList.append(movie)

    # Check for Bluray rips.
    elif len(paths) >= 3 and paths[-1].lower() == 'stream' and paths[-2].lower() == 'bdmv':
    (name, year) = VideoFiles.CleanName(paths[-3])
    movie = Media.Movie(name, year)
    for i in files:
    movie.parts.append(i)
    mediaList.append(movie)

    else:

    # Make movies!
    for i in files:
    file = os.path.basename(i)
    (name, year) = VideoFiles.CleanName(os.path.splitext(file)[0])

    # If it matches a TV show, don't scan it as a movie.
    tv = False
    for rx in SeriesScanner.episode_regexps[0:-1]:
    if re.match(rx, name):
    print "The file", file, "looked like a TV show so we're skipping it (", rx, ")"
    tv = True

    if tv == False:
    # OK, it's a movie
    movie = Media.Movie(name, year)
    movie.source = VideoFiles.RetrieveSource(file)
    movie.parts.append(i)
    mediaList.append(movie)

    # Stack the results.
    Stack.Scan(path, files, mediaList, subdirs)

    # Clean the folder name and try a match on the folder.
    if len(path) > 0:
    folderName = os.path.basename(path).replace(' ', ' ').replace(' ','.')
    (cleanName, year) = VideoFiles.CleanName(folderName)
    if len(mediaList) == 1 and re.match(nice_match, cleanName):
    res = re.findall(nice_match, cleanName)
    mediaList[0].name = res[0][0]
    mediaList[0].year = res[0][1]
    elif len(mediaList) == 1 and (len(cleanName) > 1 or year is not None):
    mediaList[0].name = cleanName
    mediaList[0].year = year

    # Check for a folder with multiple 'CD' subfolders and massage
    foundCDsubdirs = {}
    for s in subdirs:
    m = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', os.path.basename(s).lower())
    if m:
    foundSubSubDirs = False
    for subsubdir in os.listdir(s):
    if os.path.isdir(os.path.join(s, subsubdir)):
    subm = re.search(r'(?:cd|dvd|part|pt|disk|disc)[ \\.-]*([0-9]+)', subsubdir.lower())
    if subm:
    foundSubSubDirs = True
    foundCDsubdirs[m.groups(1)[0] + '-' + subm.groups(1)[0]] = os.path.join(s, subsubdir)

    if foundSubSubDirs == False:
    foundCDsubdirs['0-' + m.groups(1)[0]] = s

    # More than one cd subdir, let's stack and whack subdirs.
    if len(foundCDsubdirs) > 1:
    name, year = VideoFiles.CleanName(os.path.basename(path))
    movie = Media.Movie(name, year)
    movie.guid = checkNfoFile(os.path.dirname(foundCDsubdirs.values()[0]), 1)

    keys = foundCDsubdirs.keys()
    keys.sort()
    for key in keys:
    d = foundCDsubdirs[key]
    subFiles = []
    for f in os.listdir(d):
    subFiles.append(os.path.join(d,f))
    VideoFiles.Scan(d, subFiles, mediaList, [])

    if foundSubSubDirs:
    if os.path.dirname(d) in subdirs:
    subdirs.remove(os.path.dirname(d))
    else:
    subdirs.remove(d)

    movie.parts += subFiles

    if len(movie.parts) > 0:
    mediaList.append(movie)

    # See if we can find a GUID.
    for mediaItem in mediaList:
    if mediaItem.guid is None:
    mediaItem.guid = checkNfoFile(mediaItem.parts[0], len(mediaList))

    if len(mediaList) == 1:
    if mediaList[0].source is None:
    mediaList[0].source = VideoFiles.RetrieveSource(path)

    # If the subdirectories indicate that we're inside a DVD, when whack things other than audio and video.
    whack = []
    if 'video_ts' in [Utils.SplitPath(s)[-1].lower() for s in subdirs]:
    for dir in subdirs:
    d = os.path.basename(dir).lower()
    if d not in ['video_ts', 'audio_ts']:
    whack.append(dir)

    # Finally, if any of the subdirectories match a TV show, don't enter!
    for dir in subdirs:
    for rx in standalone_tv_regexs:
    res = re.findall(rx, dir)
    if len(res):
    whack.append(dir)

    for w in whack:
    subdirs.remove(w)

    def ContainsFile(files, file):
    for i in files:
    if os.path.basename(i).lower() == file.lower():
    return i
    return None

    def checkNfoFile(file, fileCount):
    try:
    path = None

    # Depending on how many media files we have, check differently.
    if fileCount == 1:
    # Look for any NFO file.
    for f in os.listdir(os.path.dirname(file)):
    if f[-4:].lower() == '.nfo':
    path = os.path.join(os.path.dirname(file), f)
    break
    else:
    # Look for a sidecar NFO file.
    path = os.path.splitext(file)[0] + '.nfo'

    if path is not None and os.path.exists(path):
    nfoText = open(path).read()
    m = re.search('(tt[0-9]+)', nfoText)
    if m:
    return m.groups(1)[0]
    except:
    print "Warning, couldn't read NFO file."

    return None
    68 changes: 68 additions & 0 deletions Stack.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,68 @@
    import Media, VideoFiles
    import os.path, difflib

    def compareFilenames(elem):
    return elem.parts[0]

    def Scan(dir, files, mediaList, subdirs):

    # Go through the files and see if any of them need to be stacked.
    stack_dict = {}
    stackDiffs = '123456789abcdefghijklmn' # These are the characters we are looking for being different across stackable filenames
    stackSuffixes = ['cd', 'dvd', 'part', 'pt', 'disk', 'disc']

    # Sort the mediaList by filename, so we can do our compares properly
    mediaList[:] = sorted(mediaList, key=compareFilenames)

    # Search for parts.
    count = 0
    for mediaItem in mediaList[:-1]:
    m1 = mediaList[count]
    m2 = mediaList[count + 1]
    f1 = os.path.basename(m1.parts[0])
    f2 = os.path.basename(m2.parts[0])

    opcodes = difflib.SequenceMatcher(None, f1, f2).get_opcodes()
    if len(opcodes) == 3: # We only have one transform
    (tag, i1, i2, j1, j2) = opcodes[1]
    if tag == 'replace': # The transform is a replace
    if (i2-i1 == 1) and (j2-j1 == 1): # The transform is only one character
    if 1 in [c in f1[i1:i2].lower() for c in stackDiffs]: # That one character is 1-4 or a-d
    root = f1[:i1]
    xOfy = False
    if f1[i1+1:].lower().strip().startswith('of'): #check to see if this an x of y style stack, if so flag it
    xOfy = True
    #prefix = f1[:i1] + f1[i2:]
    #(root, ext) = os.path.splitext(prefix)

    # Fix cases where it is something like part 01 ... part 02 -- remove that 0, so the suffix check works later
    if root[-1:] == '0':
    root = root[:-1]

    # This is a special case for folders with multiple Volumes of a series (not a stacked movie) [e.g, Kill Bill Vol 1 / 2]
    if not root.lower().strip().endswith('vol') and not root.lower().strip().endswith('volume'):

    # Strip any suffixes like CD, DVD.
    foundSuffix = False
    for suffix in stackSuffixes:
    if root.lower().strip().endswith(suffix):
    root = root[0:-len(suffix)].strip()
    foundSuffix = True
    break

    if foundSuffix or xOfy:
    # Replace the name, which probably had the suffix.
    (name, year) = VideoFiles.CleanName(root)
    mediaItem.name = name
    if stack_dict.has_key(root):
    stack_dict[root].append(m2)
    else:
    stack_dict[root] = [m1]
    stack_dict[root].append(m2)
    count += 1

    # Now combine stacked parts
    for stack in stack_dict.keys():
    for media in stack_dict[stack][1:]:
    stack_dict[stack][0].parts.append(media.parts[0])
    mediaList.remove(media)