Last active
August 12, 2020 15:41
Revisions
-
ivg revised this gist
Apr 20, 2020 . 1 changed file with 92 additions and 24 deletions.There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -26,7 +26,7 @@ module Byte = Bitvec.M8 let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x) let r = Array.init 32 ~f:(fun i -> Theory.Var.define word (sprintf "R%d" i)) let mem = Theory.Var.define heap "heap" let local = Theory.Var.define vars "locals" @@ -95,6 +95,8 @@ module Language(Core : Theory.Core) = struct match name with | "call" -> Theory.Label.for_name prog.(dst).name >>= fun dst -> KB.provide Theory.Label.is_subroutine dst (Some true) >>= fun () -> block pc (new_frame pc) (goto dst) | "ret" -> block pc del_frame (jmp (get (const frame_size))) @@ -103,10 +105,9 @@ module Language(Core : Theory.Core) = struct ctrl pc @@ goto dst | "beq" -> Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> ctrl pc @@ branch (non_zero (reg r)) (goto dst) skip | opcode -> invalid_argf "unknown opcode %S" opcode () let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) -> @@ -126,30 +127,97 @@ module Language(Core : Theory.Core) = struct end let def_only blk = Term.length jmp_t blk = 0 let is_unconditional jmp = match Jmp.cond jmp with | Bil.Int _ -> true | _ -> false let is_last_jump_unconditional blk = match Term.last jmp_t blk with | None -> false | Some jmp -> is_unconditional jmp let fall_if_possible dst blk = if is_last_jump_unconditional blk then blk else Term.append jmp_t blk @@ Jmp.reify ~dst () let localize_if_call pc blk = match Term.last jmp_t blk with | None -> KB.return blk | Some jmp -> match Jmp.alt jmp with | None -> KB.return blk | Some alt -> Theory.Label.for_addr (Word.int (pc+1)) >>| fun dst -> Term.update jmp_t blk @@ Jmp.reify () ~tid:(Term.tid jmp) ?cnd:(Jmp.guard jmp) ~dst:(Jmp.resolved dst) ~alt (* concat two def-only blocks *) let append_def_only b1 b2 = let b = Blk.Builder.init ~same_tid:true ~copy_defs:true b1 in Term.enum def_t b2 |> Seq.iter ~f:(Blk.Builder.add_def b); Term.enum jmp_t b2 |> Seq.iter ~f:(Blk.Builder.add_jmp b); Blk.Builder.result b (* [append xs ys] pre: the first block of [xs] is the exit block; pre: the first block of [ys] is the entry block; pre: the last block of [ys] is the exit block; post: the first block of [append xs ys] is the new exit block. *) let append xs ys = match xs, ys with | [],xs | xs,[] -> xs | x :: xs, y :: ys when def_only x -> List.rev_append ys (append_def_only x y :: xs) | x::xs, y::_ -> let fall = Jmp.resolved @@ Term.tid y in let x = fall_if_possible fall x in List.rev_append ys (x::xs) let add_blks sub blks = List.fold blks ~init:sub ~f:(Term.append blk_t) let clone ~tid blk = let b = Blk.Builder.create ~tid () in Term.enum phi_t blk |> Seq.iter ~f:(Blk.Builder.add_phi b); Term.enum def_t blk |> Seq.iter ~f:(Blk.Builder.add_def b); Term.enum jmp_t blk |> Seq.iter ~f:(Blk.Builder.add_jmp b); Blk.Builder.result b let mark_entry_blk pc blks = Theory.Label.for_addr (Word.int pc) >>| fun tid -> match blks with | [] -> [Blk.create ~tid ()] | blk :: blks -> clone ~tid blk :: blks let update_call pc blks = match blks with | [] -> KB.return [] | last :: blks -> localize_if_call pc last >>| fun last -> last :: blks let build_program lift source = let prog = Program.create () in let ctxt = {proc=0; pc=0} in Seq.of_array source |> KB.Seq.fold ~init:(ctxt,prog) ~f:(fun (ctxt,prog) proc -> Seq.of_array proc.code |> KB.Seq.fold ~init:(ctxt,[]) ~f:(fun (ctxt,blks) insn -> lift source ctxt insn >>= mark_entry_blk ctxt.pc >>= fun blks' -> update_call ctxt.pc (append blks blks') >>| fun blks -> {ctxt with pc = ctxt.pc+1},blks) >>= fun (ctxt,blks) -> Theory.Label.for_name proc.name >>| fun tid -> let sub = Sub.create ~tid ~name:proc.name () in let sub = add_blks sub (List.rev blks) in let ctxt = {ctxt with proc = ctxt.proc + 1 } in (ctxt,Term.append sub_t prog sub)) >>| snd let load filename = Sexp.load_sexps filename |> @@ -159,7 +227,6 @@ let load filename = {name; code} | _ -> failwith "Parser error, expects (defun <name> () code)") let lifter = KB.Class.declare "bytoy-lifter" () let program_t = KB.Domain.optional "program_t" ~equal:Program.equal let program = KB.Class.property lifter "program" @@ -170,6 +237,7 @@ let program = KB.Class.property lifter "program" let bytecode_domain = KB.Domain.optional "bytecode" ~equal:[%equal: prog*ctxt*insn] ~inspect:(fun (_,_,insn) -> sexp_of_insn insn) let bytecode = KB.Class.property Theory.Program.cls "bytecode" ~public:true @@ -186,7 +254,7 @@ let provide_semantics () = let lift prog = let collect_bir prog ctxt code = Theory.Label.for_addr (Word.int ctxt.pc) >>= fun insn -> KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () -> KB.collect Theory.Program.Semantics.slot insn >>| fun sema -> KB.Value.get Term.slot sema in -
ivg created this gist
Apr 20, 2020 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,217 @@ open Core_kernel open Bap_core_theory open Bap.Std open KB.Syntax include Self() let package = "bytoy" type name = string [@@deriving equal,sexp] type oper = Reg of int | Imm of int [@@deriving equal,sexp] type insn = name * int * oper [@@deriving equal,sexp] type code = insn array [@@deriving equal,sexp] type ctxt = {proc : int; pc: int} [@@deriving equal, sexp] type proc = {name : string; code : code} [@@deriving equal, sexp] type prog = proc array [@@deriving equal,sexp] let byte = Theory.Bitv.define 8 let word = Theory.Bitv.define 32 let bool = Theory.Bool.t let heap = Theory.Mem.define word byte (* global byte-addressable memory *) let vars = Theory.Mem.define word word (* local word addressable variables *) module Word = Bitvec.M32 module Byte = Bitvec.M8 let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x) let r = Array.init 32 ~f:(fun i -> Theory.Var.define word (sprintf "R%02d" i)) let mem = Theory.Var.define heap "heap" let local = Theory.Var.define vars "locals" let fp = Theory.Var.define word "FP" let frame_size = 128 module Language(Core : Theory.Core) = struct open Core let reg i = var r.(i) let imm x = int word x let op : oper -> _ Theory.Bitv.t Theory.pure = function | Reg x -> reg x | Imm x -> imm (Word.int x) let loads x = signed word @@ load (var mem) x let loadb x = unsigned word @@ load (var mem) x let loadw x = loadw word b1 (var mem) x let saveb p x = store (var mem) p (low byte x) let savew p x = storew b1 (var mem) p x let get x = load (var local) (add (var fp) x) let put p x = store (var local) (add (var fp) p) x let case_binop name ~matches:ok no = match name with | "add" -> ok add | "sub" -> ok sub | "mul" -> ok mul | "div" -> ok div | _ -> no () let case_loadw name ~matches:ok no = match name with | "lds" -> ok loads | "ldb" -> ok loadb | "ldw" -> ok loadw | _ -> no () let case_savew name ~matches:ok no = match name with | "stb" -> ok saveb | "stw" -> ok savew | _ -> no () let pass = perform Theory.Effect.Sort.bot let skip = perform Theory.Effect.Sort.bot let block seq data ctrl = Theory.Label.for_addr (Word.int seq) >>= fun label -> blk label data ctrl let data seq data = block seq data skip let ctrl seq ctrl = block seq pass ctrl let const x = int word (Word.int x) let new_frame pc = seq (set fp (add (var fp) (const (frame_size+1)))) (set local (put (const ~-1) (const pc))) let del_frame = set fp (sub (var fp) (const (frame_size+1))) let jumps prog {proc; pc} name r x = match x with | Reg _ -> failwith "ill-formed program: non-const jump" | Imm dst -> match name with | "call" -> Theory.Label.for_name prog.(dst).name >>= fun dst -> block pc (new_frame pc) (goto dst) | "ret" -> block pc del_frame (jmp (get (const frame_size))) | "jmp" -> Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> ctrl pc @@ goto dst | "beq" -> Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst -> Theory.Label.for_addr (Word.int (pc+1)) >>= fun fall -> ctrl pc @@ branch (non_zero (reg r)) (goto dst) (goto fall) | opcode -> invalid_argf "unknown opcode %S" opcode () let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) -> case_binop name ~matches:(fun f -> data pc @@ set r.(d) (f (reg d) (op s))) @@ fun () -> case_loadw name ~matches:(fun load -> data pc @@ set r.(d) (load (op s))) @@ fun () -> case_savew name ~matches:(fun save -> data pc @@ set mem (save (reg d) (op s))) @@ fun () -> match name with | "get" -> data pc @@ set r.(d) (get (op s)) | "set" -> data pc @@ set local (put (reg d) (op s)) | _ -> jumps prog {proc; pc} name d s end let add_blks sub = function | [] -> sub | first :: _ as blks -> match Term.last blk_t sub with | Some last when Term.length jmp_t first = 0 -> let fall = Jmp.create_goto (Direct (Term.tid first)) in let last = Term.append jmp_t last fall in let sub = Term.update blk_t sub last in List.fold blks ~init:sub ~f:(Term.append blk_t) | _ -> List.fold blks ~init:sub ~f:(Term.append blk_t) let build_program lift source = let prog = Program.create () in Seq.of_array source |> KB.Seq.fold ~init:(0,prog) ~f:(fun (pos,prog) proc -> Theory.Label.for_name proc.name >>= fun tid -> let sub = Sub.create ~tid ~name:proc.name () in Seq.of_array proc.code |> KB.Seq.fold ~init:({proc=pos;pc=0},sub) ~f:(fun (ctxt,sub) insn -> lift source {proc=pos; pc=0} insn >>| fun blks -> let sub = add_blks sub blks in {ctxt with pc = ctxt.pc+1},sub) >>| fun (_,sub) -> (pos+1,Term.append sub_t prog sub)) >>| snd let load filename = Sexp.load_sexps filename |> Array.of_list_map ~f:(function | Sexp.List (Atom "defun" :: Atom name :: _ :: code) -> let code = code_of_sexp (Sexp.List code) in {name; code} | _ -> failwith "Parser error, expects (defun <name> () code)") let lifter = KB.Class.declare "bytoy-lifter" () let program_t = KB.Domain.optional "program_t" ~equal:Program.equal let program = KB.Class.property lifter "program" ~public:true ~package program_t let bytecode_domain = KB.Domain.optional "bytecode" ~equal:[%equal: prog*ctxt*insn] let bytecode = KB.Class.property Theory.Program.cls "bytecode" ~public:true ~package bytecode_domain let provide_semantics () = KB.promise Theory.Program.Semantics.slot @@ fun insn -> Theory.(instance>=>require) () >>= fun (module Core) -> let module Lift = Language(Core) in KB.collect bytecode insn >>= function | None -> KB.return Insn.empty | Some (prog,ctxt,code) -> Lift.insn prog ctxt code let lift prog = let collect_bir prog ctxt code = KB.Object.create Theory.Program.cls >>= fun insn -> KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () -> KB.collect Theory.Program.Semantics.slot insn >>| fun sema -> KB.Value.get Term.slot sema in build_program collect_bir prog let run_lifter prog = let obj = KB.Object.create lifter >>= fun lifter -> lift prog >>= fun prog -> KB.provide program lifter (Some prog) >>| fun () -> lifter in match KB.run lifter obj (Toplevel.current ()) with | Error conflict -> error "ill-formed program: %a" KB.Conflict.pp conflict; invalid_arg "ill-formed program" | Ok (value,state) -> Toplevel.set state; match KB.Value.get program value with | None -> assert false | Some prog -> prog let () = Project.Input.register_loader "bytoy" @@ fun filename -> let empty = Memmap.empty in let source = load filename in provide_semantics (); let prog = run_lifter source in Project.Input.create `unknown filename ~code:empty ~data:empty ~finish:(fun proj -> Project.with_program proj prog)