Skip to content

Instantly share code, notes, and snippets.

@skywodd
Created July 26, 2017 19:53

Revisions

  1. skywodd created this gist Jul 26, 2017.
    155 changes: 155 additions & 0 deletions unicoder.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,155 @@
    """
    Unicoder - The unicode nightmare for developers.
    """

    import os
    import sys
    import random
    import codecs
    import argparse
    import unicodedata

    from collections import defaultdict

    from PIL import ImageFont


    # Package information
    __author__ = "Fabien Batteix (@skywodd)"
    __copyright__ = "Copyright 2017 TamiaLab"
    __credits__ = ["Fabien Batteix", "TamiaLab"]
    __license__ = "GPLv3"
    __version__ = "1.0.0"
    __maintainer__ = "Fabien Batteix"
    __email__ = "fabien.batteix@tamialab.fr"
    __status__ = "Production" # *wink*


    def verbose_print(verbose, *args, **kwargs):
    """ Verbose print helper """
    if verbose:
    print(*args, **kwargs)


    def build_lookalike_map(font='cour.ttf', font_size=20):
    """
    Search for unicode look-alike and build a bi-directional
    map for each unicode characters in range 0x20 to 0xFFFF.
    """
    footprint_lut = {}
    lookalike_lut = defaultdict(list)
    font = ImageFont.truetype(font, font_size)
    for c in map(chr, range(sys.maxunicode + 1)):
    if unicodedata.category(c) not in {
    'So', 'Sm', 'Sc',
    'Ps', 'Po', 'Pi',
    'Pf', 'Pe', 'Pd',
    'No', 'Nl', 'Nd',
    'Lu', 'Lt', 'Lo', 'Ll'
    }:
    continue
    pixels = font.getmask(c, mode='1')
    footprint = bytes(pixels)
    lookalike_lut[footprint].append(c)
    footprint_lut[c] = footprint
    return footprint_lut, lookalike_lut


    def lookalike_replacement(c, lut, shuffle):
    """ Get the replacement for the given character """
    footprint_lut, lookalike_lut = lut

    footprint = footprint_lut.get(c)
    if footprint is None:
    return c

    lookalike = lookalike_lut[footprint]
    if shuffle:
    return random.choice(lookalike)
    else:
    for nc in lookalike:
    if nc != c:
    return nc
    return c


    def unicoder_file(path, lut, verbose=False, shuffle=False):
    """ Backup testing routine. """

    # Test if file exist first
    if not os.path.exists(path):
    print('File not found "{}"'.format(path), file=sys.stderr)
    return

    # Special case for directories
    if os.path.isdir(path):
    print('Skipping directory "{}"'.format(path), file=sys.stderr)
    return

    # Open the file and get shit done
    with codecs.open(path, 'r+', encoding='utf8') as f:
    data = f.read()
    f.seek(0)
    data = map(lambda c: lookalike_replacement(c, lut, shuffle), data)
    f.write(''.join(data))


    # Main entry point
    if __name__ == '__main__':

    # Arguments parser
    parser = argparse.ArgumentParser(
    description='Turn a source code (or any text) file(s) into garbage using unicode look-alike characters.',
    epilog='The author of this program IS NOT responsible for any damage made with it.'
    )
    parser.add_argument('--version', action='version',
    version='Unicoder {} {}'.format(__version__, __copyright__))
    parser.add_argument('paths', metavar='FILE', nargs='+',
    help='File(s) path(s) to be processed.')
    parser.add_argument('--verbose', dest='verbose', action='store_true',
    help='output more information during the files processing.')
    parser.add_argument('--shuffle', dest='shuffle', action='store_true',
    help='randomize the unicode replacement process.')
    parser.add_argument('--font', dest='font', default='cour.ttf',
    help='target font for look-alike detection (default is cour.ttf).')
    parser.add_argument('--font-size', metavar='SIZE', dest='font_size', type=int, default=20,
    help='font size to use for look-alike detection (default is 20).')
    parser.add_argument('--russian-roulette', dest='russian_roulette', action='store_true',
    help='one chance out of six to get shot.')
    args = parser.parse_args(['test.py'])

    # Criticaly important warning
    print("Hi user!")
    print("Seem like you're about to make a terrible mistake or a stupid joke.")
    print("Do whatever you want to. I'm a program, not your mom. But be sure to understand this warning first.")
    print("This program *WILL DESTROY* the given file(s) by replacing all caracters with unicode look-alike.")
    print("Cancel the execution of this program *NOW* if you don't have a tested backup of all files!")
    print("The author of this program *IS NOT* responsible for any damage made with this program.")
    print("Use this program *AT YOUR OWN RISKS*!")
    print()
    if input('Type "I agree" to continue: ').strip('"').lower() != 'i agree':
    print('Execution cancelled')
    exit(1)

    # Build the LUT
    verbose_print(args.verbose, 'Building look-alike map for font "{}" at size {} ...'.format(args.font, args.font_size))
    lut = build_lookalike_map(args.font, args.font_size)
    verbose_print(args.verbose, 'Look-alike map generated!')

    # Process each path
    verbose_print(args.verbose, 'Starting file(s) processing ...')
    for path in args.paths:
    verbose_print(args.verbose, 'Processing: {}'.format(path))

    # Trigger the routine only 1/6th of the times in russian roulette mode
    if args.russian_roulette and random.randrange(6):
    print('*click*')
    continue

    # Too late to cancel
    unicoder_file(path, lut,
    verbose=args.verbose,
    shuffle=args.shuffle)

    # End-of-script
    verbose_print(args.verbose, 'File(s) processing done.')