Last active
August 12, 2020 15:41
-
-
Save ivg/7c8f12d9e6ed9164c9c4fd7727529105 to your computer and use it in GitHub Desktop.
Lifting a toy bytecode using Core Theory
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| open Core_kernel | |
| open Bap_core_theory | |
| open Bap.Std | |
| open KB.Syntax | |
| include Self() | |
| let package = "bytoy" | |
| type name = string [@@deriving equal,sexp] | |
| type oper = Reg of int | Imm of int [@@deriving equal,sexp] | |
| type insn = name * int * oper [@@deriving equal,sexp] | |
| type code = insn array [@@deriving equal,sexp] | |
| type ctxt = {proc : int; pc: int} [@@deriving equal, sexp] | |
| type proc = {name : string; code : code} [@@deriving equal, sexp] | |
| type prog = proc array [@@deriving equal,sexp] | |
| let byte = Theory.Bitv.define 8 | |
| let word = Theory.Bitv.define 32 | |
| let bool = Theory.Bool.t | |
| let heap = Theory.Mem.define word byte (* global byte-addressable memory *) | |
| let vars = Theory.Mem.define word word (* local word addressable variables *) | |
| module Word = Bitvec.M32 | |
| module Byte = Bitvec.M8 | |
| let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x) | |
| let r = Array.init 32 ~f:(fun i -> | |
| Theory.Var.define word (sprintf "R%02d" i)) | |
| let mem = Theory.Var.define heap "heap" | |
| let local = Theory.Var.define vars "locals" | |
| let fp = Theory.Var.define word "FP" | |
| let frame_size = 128 | |
| module Language(Core : Theory.Core) = struct | |
| open Core | |
| let reg i = var r.(i) | |
| let imm x = int word x | |
| let op : oper -> _ Theory.Bitv.t Theory.pure = function | |
| | Reg x -> reg x | |
| | Imm x -> imm (Word.int x) | |
| let loads x = signed word @@ load (var mem) x | |
| let loadb x = unsigned word @@ load (var mem) x | |
| let loadw x = loadw word b1 (var mem) x | |
| let saveb p x = store (var mem) p (low byte x) | |
| let savew p x = storew b1 (var mem) p x | |
| let get x = load (var local) (add (var fp) x) | |
| let put p x = store (var local) (add (var fp) p) x | |
| let case_binop name ~matches:ok no = match name with | |
| | "add" -> ok add | |
| | "sub" -> ok sub | |
| | "mul" -> ok mul | |
| | "div" -> ok div | |
| | _ -> no () | |
| let case_loadw name ~matches:ok no = match name with | |
| | "lds" -> ok loads | |
| | "ldb" -> ok loadb | |
| | "ldw" -> ok loadw | |
| | _ -> no () | |
| let case_savew name ~matches:ok no = match name with | |
| | "stb" -> ok saveb | |
| | "stw" -> ok savew | |
| | _ -> no () | |
| let pass = perform Theory.Effect.Sort.bot | |
| let skip = perform Theory.Effect.Sort.bot | |
| let block seq data ctrl = | |
| Theory.Label.for_addr (Word.int seq) >>= fun label -> | |
| blk label data ctrl | |
| let data seq data = block seq data skip | |
| let ctrl seq ctrl = block seq pass ctrl | |
| let const x = int word (Word.int x) | |
| let new_frame pc = seq | |
| (set fp (add (var fp) (const (frame_size+1)))) | |
| (set local (put (const ~-1) (const pc))) | |
| let del_frame = | |
| set fp (sub (var fp) (const (frame_size+1))) | |
| let jumps prog {proc; pc} name r x = match x with | |
| | Reg _ -> failwith "ill-formed program: non-const jump" | |
| | Imm dst -> | |
| match name with | |
| | "call" -> | |
| Theory.Label.for_name prog.(dst).name >>= fun dst -> | |
| block pc (new_frame pc) (goto dst) | |
| | "ret" -> | |
| block pc del_frame (jmp (get (const frame_size))) | |
| | "jmp" -> | |
| Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> | |
| ctrl pc @@ goto dst | |
| | "beq" -> | |
| Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> | |
| Theory.Label.for_addr (Word.int (pc+1)) >>= fun fall -> | |
| ctrl pc @@ branch (non_zero (reg r)) | |
| (goto dst) | |
| (goto fall) | |
| | opcode -> invalid_argf "unknown opcode %S" opcode () | |
| let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) -> | |
| case_binop name ~matches:(fun f -> | |
| data pc @@ set r.(d) (f (reg d) (op s))) @@ fun () -> | |
| case_loadw name ~matches:(fun load -> | |
| data pc @@ set r.(d) (load (op s))) @@ fun () -> | |
| case_savew name ~matches:(fun save -> | |
| data pc @@ set mem (save (reg d) (op s))) @@ fun () -> | |
| match name with | |
| | "get" -> | |
| data pc @@ set r.(d) (get (op s)) | |
| | "set" -> | |
| data pc @@ set local (put (reg d) (op s)) | |
| | _ -> jumps prog {proc; pc} name d s | |
| end | |
| let add_blks sub = function | |
| | [] -> sub | |
| | first :: _ as blks -> | |
| match Term.last blk_t sub with | |
| | Some last when Term.length jmp_t first = 0 -> | |
| let fall = Jmp.create_goto (Direct (Term.tid first)) in | |
| let last = Term.append jmp_t last fall in | |
| let sub = Term.update blk_t sub last in | |
| List.fold blks ~init:sub ~f:(Term.append blk_t) | |
| | _ -> List.fold blks ~init:sub ~f:(Term.append blk_t) | |
| let build_program lift source = | |
| let prog = Program.create () in | |
| Seq.of_array source |> | |
| KB.Seq.fold ~init:(0,prog) ~f:(fun (pos,prog) proc -> | |
| Theory.Label.for_name proc.name >>= fun tid -> | |
| let sub = Sub.create ~tid ~name:proc.name () in | |
| Seq.of_array proc.code |> | |
| KB.Seq.fold ~init:({proc=pos;pc=0},sub) ~f:(fun (ctxt,sub) insn -> | |
| lift source {proc=pos; pc=0} insn >>| fun blks -> | |
| let sub = add_blks sub blks in | |
| {ctxt with pc = ctxt.pc+1},sub) >>| fun (_,sub) -> | |
| (pos+1,Term.append sub_t prog sub)) >>| snd | |
| let load filename = | |
| Sexp.load_sexps filename |> | |
| Array.of_list_map ~f:(function | |
| | Sexp.List (Atom "defun" :: Atom name :: _ :: code) -> | |
| let code = code_of_sexp (Sexp.List code) in | |
| {name; code} | |
| | _ -> failwith "Parser error, expects (defun <name> () code)") | |
| let lifter = KB.Class.declare "bytoy-lifter" () | |
| let program_t = KB.Domain.optional "program_t" ~equal:Program.equal | |
| let program = KB.Class.property lifter "program" | |
| ~public:true | |
| ~package | |
| program_t | |
| let bytecode_domain = KB.Domain.optional "bytecode" | |
| ~equal:[%equal: prog*ctxt*insn] | |
| let bytecode = KB.Class.property Theory.Program.cls "bytecode" | |
| ~public:true | |
| ~package | |
| bytecode_domain | |
| let provide_semantics () = | |
| KB.promise Theory.Program.Semantics.slot @@ fun insn -> | |
| Theory.(instance>=>require) () >>= fun (module Core) -> | |
| let module Lift = Language(Core) in | |
| KB.collect bytecode insn >>= function | |
| | None -> KB.return Insn.empty | |
| | Some (prog,ctxt,code) -> Lift.insn prog ctxt code | |
| let lift prog = | |
| let collect_bir prog ctxt code = | |
| KB.Object.create Theory.Program.cls >>= fun insn -> | |
| KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () -> | |
| KB.collect Theory.Program.Semantics.slot insn >>| fun sema -> | |
| KB.Value.get Term.slot sema in | |
| build_program collect_bir prog | |
| let run_lifter prog = | |
| let obj = | |
| KB.Object.create lifter >>= fun lifter -> | |
| lift prog >>= fun prog -> | |
| KB.provide program lifter (Some prog) >>| fun () -> | |
| lifter in | |
| match KB.run lifter obj (Toplevel.current ()) with | |
| | Error conflict -> | |
| error "ill-formed program: %a" KB.Conflict.pp conflict; | |
| invalid_arg "ill-formed program" | |
| | Ok (value,state) -> | |
| Toplevel.set state; | |
| match KB.Value.get program value with | |
| | None -> assert false | |
| | Some prog -> prog | |
| let () = Project.Input.register_loader "bytoy" @@ fun filename -> | |
| let empty = Memmap.empty in | |
| let source = load filename in | |
| provide_semantics (); | |
| let prog = run_lifter source in | |
| Project.Input.create `unknown filename ~code:empty ~data:empty | |
| ~finish:(fun proj -> Project.with_program proj prog) |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
Building
Put this file into a separate folder and issue the following command:
Using
Using the following example program (put it into the example.bytoy file):
We can get lift and gets its graph with the following commands:
You can then view the generated graph either using
xdot example.dotor translate it to an image, e.g., withdot example.dot -Tpng -o example.png, the sample output is provided below: