import re
from hashlib import md5

def gfm(text):
    # Extract pre blocks.
    extractions = {}
    def pre_extraction_callback(matchobj):
        digest = md5(matchobj.group(0)).hexdigest()
        extractions[digest] = matchobj.group(0)
        return "{gfm-extraction-%s}" % digest
    pattern = re.compile(r'<pre>.*?</pre>', re.MULTILINE | re.DOTALL)
    text = re.sub(pattern, pre_extraction_callback, text)

    # Prevent foo_bar_baz from ending up with an italic word in the middle.
    def italic_callback(matchobj):
        s = matchobj.group(0)
        if list(s).count('_') >= 2:
            return s.replace('_', '\_')
        return s
    text = re.sub(r'^(?! {4}|\t)\w+_\w+_\w[\w_]*', italic_callback, text)

    # In very clear cases, let newlines become <br /> tags.
    def newline_callback(matchobj):
        if len(matchobj.group(1)) == 1:
            return matchobj.group(0).rstrip() + '  \n'
        else:
            return matchobj.group(0)
    pattern = re.compile(r'^[\w\<][^\n]*(\n+)', re.MULTILINE)
    text = re.sub(pattern, newline_callback, text)

    # Insert pre block extractions.
    def pre_insert_callback(matchobj):
        return '\n\n' + extractions[matchobj.group(1)]
    text = re.sub(r'{gfm-extraction-([0-9a-f]{32})\}', pre_insert_callback, text)

    return text


# Test suite.
try:
    from nose.tools import assert_equal
except ImportError:
    def assert_equal(a, b):
        assert a == b, '%r != %r' % (a, b)

def test_single_underscores():
    """Don't touch single underscores inside words."""
    assert_equal(
        gfm('foo_bar'),
        'foo_bar',
    )

def test_underscores_code_blocks():
    """Don't touch underscores in code blocks."""
    assert_equal(
        gfm('    foo_bar_baz'),
        '    foo_bar_baz',
    )

def test_underscores_pre_blocks():
    """Don't touch underscores in pre blocks."""
    assert_equal(
        gfm('<pre>\nfoo_bar_baz\n</pre>'),
        '\n\n<pre>\nfoo_bar_baz\n</pre>',
    )

def test_pre_block_pre_text():
    """Don't treat pre blocks with pre-text differently."""
    a = '\n\n<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
    b = 'hmm<pre>\nthis is `a\\_test` and this\\_too\n</pre>'
    assert_equal(
        gfm(a)[2:],
        gfm(b)[3:],
    )

def test_two_underscores():
    """Escape two or more underscores inside words."""
    assert_equal(
        gfm('foo_bar_baz'),
        'foo\\_bar\\_baz',
    )

def test_newlines_simple():
    """Turn newlines into br tags in simple cases."""
    assert_equal(
        gfm('foo\nbar'),
        'foo  \nbar',
    )

def test_newlines_group():
    """Convert newlines in all groups."""
    assert_equal(
        gfm('apple\npear\norange\n\nruby\npython\nerlang'),
        'apple  \npear  \norange\n\nruby  \npython  \nerlang',
    )

def test_newlines_long_group():
    """Convert newlines in even long groups."""
    assert_equal(
        gfm('apple\npear\norange\nbanana\n\nruby\npython\nerlang'),
        'apple  \npear  \norange  \nbanana\n\nruby  \npython  \nerlang',
    )

def test_newlines_list():
    """Don't convert newlines in lists."""
    assert_equal(
        gfm('# foo\n# bar'),
        '# foo\n# bar',
    )
    assert_equal(
        gfm('* foo\n* bar'),
        '* foo\n* bar',
    )