Created
May 30, 2015 14:48
-
-
Save fbwright/b9d7c3fcfe573b3c63e6 to your computer and use it in GitHub Desktop.
Simple assembler for a simple VM
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
from __future__ import print_function, division | |
import sys | |
from time import sleep | |
if sys.version_info.major < 3: | |
input = raw_input | |
#THIS! IS! A! FUCKING! MESS! | |
#What follows was cobbled together in ~1 hour of sleepy boredom*, | |
# and thus may be filled with hacks, not-well-thought-out names and | |
# other zany things. Blaargh. | |
#It fails on error (on any error), it's not readily extensible (if | |
# it can be extended at all) and all-around bad. | |
#I hate christmas. | |
#* Und then extended on the following day, still sleepy-bored... | |
# I would give a finger for a good night's sleep. | |
# | |
#The only good things that I can say about this code is that it | |
# works. Kinda, sorta, if you don't look at it too closely. Not | |
# on wednesdays. | |
#TODO: I need to fucking refactor the following... | |
def dByte(params, line_number): | |
#print("Called dByte") | |
byte = bytearray() | |
for param in params: | |
try: | |
if param.startswith("0x"): | |
value = int(param, 16) | |
else: | |
value = int(param) | |
#print("*^", value) | |
#if value < 0: | |
# value = 0x10000 + value | |
#v_top = value >> 8 | |
#v_bot = value & 0xFF | |
#print("*^ %x %x" %(v_top, v_bot), value) | |
try: | |
byte.append(value) #This is what throws a ValueError | |
#byte.append(v_bot) # with negative numbers... | |
#Handled. Not in a good way, but handled. | |
except ValueError: | |
print("Error: Number out of bounds %s (%2X) at line %s." % (value, value, line_number)) | |
#raise SystemExit | |
except ValueError: | |
#print("Isalabel", param) | |
byte.extend(bytearray(1)) | |
return byte, param | |
#print(len(byte), byte) | |
return byte, None | |
def dWord(params, line_number): | |
pass | |
PseudoInstructions = { | |
"DBYTE": dByte, | |
"DWORD": dWord | |
} | |
Instructions = { | |
"NOP": 0b00000000, | |
"PUSH": 0b00100000, | |
"DROP": 0b00100001, | |
"DUP": 0b00100010, | |
"SWAP": 0b00100011, | |
"PICK": 0b00100100, | |
"ADD": 0b01000000, | |
"SUB": 0b01000001, | |
"JUMP": 0b01111111, | |
"JREL": 0b01110000, | |
"JZ": 0b01100000, | |
"PRINT": 0b11100000, | |
} | |
def no_parameters(func): | |
def func_wrapper(params, line_number): | |
if len(params) > 0: | |
print("Error: %s does not need any parameters - at line %s." % (func.__name__[3:], line_number)) | |
return func_wrapper | |
def one_parameter(func): | |
def func_wrapper(params, line_number): | |
if len(params) != 1: | |
print("Error: %s requires exactly one parameter - at line %s." % (func.__name__[3:], line_number)) | |
return func_wrapper | |
def word(func): | |
def func_wrapper(params, line_number): | |
try: | |
getWord(param[0]) | |
except ValueError: | |
print("Error: %s expects a word parameter - at line %s." % (func.__name__[3:], line_number)) | |
return func_wrapper | |
def newbase(op): | |
byte = bytearray() | |
byte.append(Instructions[op]) | |
@no_parameters | |
def op_NOP(params, line_number): return newbase("NOP"), None | |
@one_parameter | |
@word | |
def op_PUSH(params, line_number): pass | |
@no_parameters | |
def op_DROP(params, line_number): return newbase("DROP"), None | |
@no_parameters | |
def op_DUP(params, line_number): return newbase("DUP"), None | |
@no_parameters | |
def op_SWAP(params, line_number): return newbase("SWAP"), None | |
def op_PICK(params, line_number): pass | |
@no_parameters | |
def op_ADD(params, line_number): return newbase("ADD"), None | |
@no_parameters | |
def op_SUB(params, line_number): return newbase("SUB"), None | |
def op_JUMP(params, line_number): pass | |
def op_JZ(params, line_number): pass | |
def op_JREL(params, line_number): pass | |
@no_parameters | |
def op_PRINT(params, line_number): return newbase("PRINT"), None | |
Instr_Funct = {} | |
for instr in Instructions: | |
exec("{0} = {1}".format(instr, Instructions[instr])) | |
exec("Instr_Funct['{0}'] = op_{0}".format(instr)) | |
ParamByte = (PICK, ) | |
ParamWord = (PUSH, JUMP, JZ) | |
#What! Follows! Is! Quite! Silly! WE! HAAAVE!! EXEEEEC!!! | |
#Redundancy is bad, mkay? And yes, yes, exec is bad and all that jazz... | |
# Don't do this at home, children! | |
#PUSH, DROP, DUP, ADD, SUB, PRINT, JUMP, JZ = 0b1, 0b10, 0b11, 0b1000, 0b1001, 0b1111, 0b11111, 0b10000 | |
def execute(bytecode, debug=0): | |
stack = [] | |
pc = 0 | |
while pc < len(bytecode): | |
byte = bytecode[pc] | |
try: | |
word_h = bytecode[pc + 1] | |
word_l = bytecode[pc + 2] | |
except IndexError: | |
word_h = word_l = 0 | |
word = (word_h << 8 | word_l) | |
byte_ = word_h | |
if word > 2**7: | |
word -= 0x10000 | |
if debug: | |
print("PC: %s\t%02X %02X %02X\t[%s] %s" % (pc, byte, word_h, word_l, len(stack), " ".join(str(i) for i in stack))) | |
sleep(0.2) | |
pc += 1 | |
if byte == NOP: | |
pass | |
elif byte == PUSH: | |
stack.append(word) | |
pc += 2 | |
elif byte == DROP: | |
stack.pop() | |
elif byte == DUP: | |
stack.append(stack[-1]) | |
elif byte == SWAP: | |
a, b = stack.pop(), stack.pop() | |
stack.append(a) | |
stack.append(b) | |
elif byte == PICK: | |
stack.append(stack[-byte_]) | |
elif byte == ADD: | |
stack.append(stack.pop() + stack.pop()) | |
elif byte == SUB: | |
stack.append(-stack.pop() + stack.pop()) | |
elif byte == PRINT: | |
print(stack.pop()) | |
elif byte == JUMP: | |
pc = word | |
elif byte == JREL: | |
pc += 2 | |
pc += word | |
elif byte == JZ: | |
if stack[-1] != 0: pc = word | |
else: pc += 2 | |
def drop_comments(line): | |
return line.rsplit("#")[0] | |
def tokenize(program): | |
labels = {} | |
ret = [] | |
for line in program: | |
temp = drop_comments(line).split(":") | |
if len(temp) == 1: | |
temp = ["", temp[0]] | |
else: | |
labels[temp[0]] = -1 | |
temp[1] = temp[1].split(maxsplit=1) | |
if len(temp[1]) > 1: | |
temp[1][1:] = (i.strip() for i in temp[1][1].split(",")) | |
ret.append(temp) | |
return ret, labels | |
def getWord(string, line_number): | |
result = bytearray() | |
if string.startswith("0x"): | |
value = int(string, 16) | |
else: | |
value = int(string) | |
if value < 0: | |
value = 0x10000 + value | |
word_h = value >> 8 | |
word_l = value & 0xFF | |
if word_h > 0xFF: | |
print("Error: Number out of bounds %s (%2X %2X) at line %s - truncated (%2X %2X)." % (value, word_h, word_l, line_number, word_h & 0xFF, word_l)) | |
word_h &= 0xFF | |
result.append(word_h) | |
result.append(word_l) | |
return result | |
def getByte(string, line_number): | |
result = bytearray() | |
value = int(string, 16 if string.startswith("0x") else 10) | |
value += 0x100 if value < 0 else 0 | |
if value > 0xFF or value < 0x00: | |
print("Error: Byte out of bounds %s (%2X) at line %s - truncated (%2X)." % (value, value, line_number, value & 0xFF)) | |
value &= 0xFF | |
result.append(value) | |
return result | |
def emit(instr, params, line_number): | |
byte = bytearray() | |
if instr in PseudoInstructions: | |
return PseudoInstructions[instr](params, line_number) | |
#else: | |
# return Instr_Funct[instr](params, line_number) | |
byte.append(Instructions[instr]) | |
if Instructions[instr] in ParamByte or \ | |
Instructions[instr] in ParamWord: | |
for param in params: | |
try: | |
byte.extend(getWord(param, line_number)) | |
except ValueError: | |
#print("Isalabel", param) | |
byte.extend(bytearray(2)) | |
return byte, param | |
return byte, None | |
def emit_bytecode(program, labels): | |
to_labels = [] #short for labels_to_place or something | |
bytecode = bytearray() | |
index = 0 | |
#First pass - find labels and emit instructions opcodes | |
for line, token in enumerate(program): | |
label = token[0] | |
if label: | |
labels[label] = index | |
if len(token[1]) == 0: continue | |
instr, params = token[1][0], token[1][1:] | |
byte, lbl = emit(instr, params, line) | |
if lbl: | |
to_labels.append((lbl, index, line)) | |
bytecode.extend(byte) | |
index += len(byte) | |
#Second pass - emit labels' addresses | |
for label, index, line in to_labels: | |
#print("Found label '%s' @ %s" % (label, index)) | |
try: | |
address = labels[label] | |
bytecode[index + 1] = address >> 8 | |
bytecode[index + 2] = address & 0xFF | |
except KeyError: | |
print("Error: Unknown label '%s' at line %s." % (label, line)) | |
#raise SystemExit | |
return bytecode | |
def disassemble(bytecode): | |
#Sort of - I'm not really disassembling this, just splitting the binary | |
# into instructions (opcode + parameters, opcode only) | |
disassembled = [] | |
index = 0 | |
while index < len(bytecode): | |
byte = bytecode[index] | |
if byte in ParamWord: | |
disassembled.append(bytecode[index:index+3]) | |
index += 2 | |
elif byte in ParamByte: | |
disassembled.append(bytecode[index:index+2]) | |
index += 1 | |
else: | |
disassembled.append(byte) | |
index += 1 | |
return disassembled | |
def load(): | |
ret = [] | |
cmd = "" | |
print("Write the program - '@' to stop:") | |
while cmd != "@": | |
cmd = input(".. ") | |
ret.append(cmd) | |
return ret | |
def chunks(l, n): | |
""" Yield successive n-sized chunks from l. | |
""" | |
for i in range(0, len(l), n): | |
yield l[i:i+n] | |
def hex_bytearray(b): | |
if type(b) is int: | |
return "%02X" % b | |
return " ".join(("%02X" % byte for byte in b)) | |
def hexdump(bytecode): | |
BYTES = 16 | |
print("\nHEXDUMP - Dumped {0} bytes.".format(len(bytecode))) | |
for index, chunk in enumerate(chunks(bytecode, BYTES)): | |
offset = index * BYTES | |
hex, ori = "", "" | |
for byte in chunk: | |
hex = hex + "%02X" % byte + " " | |
if byte not in (0, 8, 9, 10, 13) and \ | |
byte < 0x80 or byte > 0x9F: | |
ori = ori + chr(byte) | |
else: | |
ori = ori + '.' | |
print("{0:0>8X} {1:{2}} {3:{4}}".format( | |
offset, hex, 3 * BYTES, ori, BYTES)) | |
print() | |
# for byte in range(256): | |
# if byte < 0x80 or byte > 0x9F: print(chr(byte), end="") | |
# else: print(".", end="") | |
if __name__ == "__main__": | |
#program = load() | |
program = \ | |
""" | |
#Calculate fibonacci numbers 1..10 | |
""" | |
program = \ | |
""" | |
NOP 33 | |
PUSH baa, ba | |
init: | |
PUSH 8 #This is a quite long comment that should be cut | |
PUSH 4 | |
ADD | |
DUP | |
loop: | |
PUSH 1 | |
SUB | |
DUP | |
JZ loop | |
JUMP end | |
DROP #All | |
PUSH 0xFFFF #this | |
PRINT #is | |
DROP #skipped | |
JUMP iniv #!!!1! | |
PUSH 3789652 | |
PUSH 127 | |
end: | |
PUSH -1111 | |
PUSH 0xFFFF | |
PUSH 129 | |
""".split("\n") | |
#print("\n".join(program)) | |
tokens, labels = tokenize(program) | |
bytecode = emit_bytecode(tokens, labels) | |
split_bytecode, s_i = disassemble(bytecode), 0 | |
print("{0:6} {1:32} {2:30}{3}".format("LINE", "ORIGINAL", "TOKENIZED", "ASSEMBLED")) | |
for line_number, (line, token) in enumerate(zip(program, tokens)): | |
#print("%6s %30s %30s" % (line_number, line, token)) | |
print("{0:>6} {1:<32} {2:<30}".format(line_number, line[:min(len(line), 32)], "%s"%token), end = "") | |
if len(token[1]): | |
print(hex_bytearray(split_bytecode[s_i]), end = "") | |
s_i += 1 | |
print() | |
#print(tokens, labels) | |
hexdump(bytecode) | |
#print(", ".join(hex_bytearray(b) for b in disassemble(bytecode))) | |
# print("\"", end="") | |
# for byte in bytecode: | |
# print(chr(byte), end="") | |
# print("\"") | |
print("STARTING EXECUTION") | |
execute(bytecode) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment