#!/usr/bin/env python
# coding: utf-8
from __future__ import unicode_literals
from collections import defaultdict
from steve.util import (
    #get_from_config,
    get_project_config,
    save_json_files,
    #load_json_files,
)
import json
import internetarchive

def files_by_format(item):
    d = defaultdict(list)
    for f in item.iter_files():
        d[f.format].append(f)
    return d

def get_format_url(files_lookup, fmt):
    formats = files_lookup.get(fmt)
    if formats:
        return formats[0].url
    return ''

def subject2tags(metadata):
    subject = metadata.get('subject')
    if not subject:
        return []
    return [t.strip() for t in subject.split(';')]

def creator2speakers(metadata):
    if 'creator' in metadata:
        return [metadata['creator']]
    return []

def language2language(metadata):
    # need to lookup 3 letter codes
    return metadata['language']

def item2source_url(item):
    return '{}//archive.org/details/{}'.format(item.protocol, item.identifier)

def item2video(item, category, language):
    video = {}
    if not item.exists:
        return {}
    md = item.metadata
    video['category'] = category
    video['state'] = 2
    video['title'] = md['title']
    video['description'] = md.get('description', '')
    video['summary'] =  md.get('description', '')
    video['tags'] = subject2tags(md)
    video['speakers'] = creator2speakers(md)
    video['language'] = language
    video['copyright_text'] = md.get('licenseurl', '')
    video['recorded'] = md.get('date', '')
    video['whiteboard'] = 'ia scrape'
    video['source_url'] = item2source_url(item)

    file_lookup = files_by_format(item)
    video['thumbnail_url'] = get_format_url(file_lookup, 'Thumbnail')
    video['video_ogv_url'] = get_format_url(file_lookup, 'Ogg Video')
    video['video_ogv_download_only'] = False 
    video['video_mp4_url'] = get_format_url(file_lookup, 'MPEG4')
    video['video_mp4_download_only'] = False 
    video['video_webm_download_only'] = False 
    video['video_webm_url'] = ''
    video['video_flv_download_only'] = False 
    video['video_flv_url'] = '' 
    return video


if __name__ == "__main__":
    cfg = get_project_config()

    videos = []

    search = internetarchive.search_items('subject:pyconza2014')
    identifiers = [result['identifier'] for result in search]

    for identifier in identifiers:
        item = internetarchive.Item(identifier)
        video = item2video(item, 'PyCon ZA 2014', 'English')
        if item.exists:
            videos.append(('json/{}.json'.format(identifier), video))
    save_json_files(cfg, videos)