#!/usr/bin/env python3

from base64 import b64decode
import hashlib
from lxml import etree
from io import BytesIO
import os
from time import strptime

from pypandoc import convert_text

#http://www.hanxiaogang.com/writing/parsing-evernote-export-file-enex-using-python/
p = etree.XMLParser(remove_blank_text=True, resolve_entities=False)

def parse_content(content):
    text = convert_text(content, 'org', format='html')
    return text

def parse_resource(resource):
    rsc_dict = {}
    for elem in resource:
        if elem.tag == 'data':
            # Some times elem.text is None
            rsc_dict[elem.tag] = b64decode(elem.text) if elem.text else b''
            rsc_dict['hash'] = hashlib.md5(rsc_dict[elem.tag]).hexdigest()
        else:
            rsc_dict[elem.tag] = elem.text

    return rsc_dict

def parse_note(note):
    note_dict = {}
    resources = []
    for elem in note:
        if elem.tag == 'content':
            note_dict[elem.tag] = parse_content(elem.text)
            # A copy of original content
            note_dict['content-raw'] = elem.text
        elif elem.tag == 'resource':
            resources.append(parse_resource(elem))
        elif elem.tag == 'created' or elem.tag == 'updated':
            note_dict[elem.tag] = strptime(elem.text, '%Y%m%dT%H%M%SZ')
        else:
            note_dict[elem.tag] = elem.text

    note_dict['resource'] = resources

    return note_dict

def parseNoteXML(xmlFile):
    # Without huge_tree set to True, parser may complain about huge text node
    # Try to recover, because there may be "&nbsp;", which will cause
    # "XMLSyntaxError: Entity 'nbsp' not defined"
    context = etree.iterparse(xmlFile, encoding='utf-8', strip_cdata=False, huge_tree=True, recover=True)
    for action, elem in context:
        if elem.tag == "note":
            yield parse_note(elem)

# Save notes and attachments
# in directories named according to date of creation
def export_note(note):
    date = note['created']
    year = str(date.tm_year)
    mon = '%02d' % date.tm_mon
    mday = '%02d' % date.tm_mday
    note_dir = os.path.join('en-export', year, mon, mday)
    os.makedirs(note_dir, exist_ok=True)
    # Remove "/" from filenames
    title = note['title'].replace('/', ' ')[:20]
    text_file = os.path.join(note_dir, title +'.org')
    with open(text_file, 'w') as fd:
        # Write the original title
        fd.write('#+TITLE: ' + note['title'] + '\n')
        fd.write(note['content'])
    bak_file = os.path.join(note_dir, title+'.bak')
    with open(bak_file, 'w') as fd:
        fd.write(note['content-raw'])
    for resource in note['resource']:
        rsc_file = os.path.join(note_dir, resource['hash']+'.data')
        data = resource['data']
        with open(rsc_file, 'wb') as fd:
            fd.write(data)

if __name__ == '__main__':
    notes = parseNoteXML('mynote.enex')
    for note in notes:
        export_note(note)