Skip to content

Instantly share code, notes, and snippets.

@ivg
Last active August 12, 2020 15:41
  • Select an option

Select an option

Revisions

  1. ivg revised this gist Apr 20, 2020. 1 changed file with 92 additions and 24 deletions.
    116 changes: 92 additions & 24 deletions bytoy.ml
    Original file line number Diff line number Diff line change
    @@ -26,7 +26,7 @@ module Byte = Bitvec.M8
    let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x)

    let r = Array.init 32 ~f:(fun i ->
    Theory.Var.define word (sprintf "R%02d" i))
    Theory.Var.define word (sprintf "R%d" i))

    let mem = Theory.Var.define heap "heap"
    let local = Theory.Var.define vars "locals"
    @@ -95,6 +95,8 @@ module Language(Core : Theory.Core) = struct
    match name with
    | "call" ->
    Theory.Label.for_name prog.(dst).name >>= fun dst ->
    KB.provide Theory.Label.is_subroutine dst (Some true) >>=
    fun () ->
    block pc (new_frame pc) (goto dst)
    | "ret" ->
    block pc del_frame (jmp (get (const frame_size)))
    @@ -103,10 +105,9 @@ module Language(Core : Theory.Core) = struct
    ctrl pc @@ goto dst
    | "beq" ->
    Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst ->
    Theory.Label.for_addr (Word.int (pc+1)) >>= fun fall ->
    ctrl pc @@ branch (non_zero (reg r))
    (goto dst)
    (goto fall)
    skip
    | opcode -> invalid_argf "unknown opcode %S" opcode ()

    let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) ->
    @@ -126,30 +127,97 @@ module Language(Core : Theory.Core) = struct

    end


    let add_blks sub = function
    | [] -> sub
    | first :: _ as blks ->
    match Term.last blk_t sub with
    | Some last when Term.length jmp_t first = 0 ->
    let fall = Jmp.create_goto (Direct (Term.tid first)) in
    let last = Term.append jmp_t last fall in
    let sub = Term.update blk_t sub last in
    List.fold blks ~init:sub ~f:(Term.append blk_t)
    | _ -> List.fold blks ~init:sub ~f:(Term.append blk_t)
    let def_only blk = Term.length jmp_t blk = 0

    let is_unconditional jmp = match Jmp.cond jmp with
    | Bil.Int _ -> true
    | _ -> false

    let is_last_jump_unconditional blk =
    match Term.last jmp_t blk with
    | None -> false
    | Some jmp -> is_unconditional jmp

    let fall_if_possible dst blk =
    if is_last_jump_unconditional blk
    then blk
    else
    Term.append jmp_t blk @@
    Jmp.reify ~dst ()

    let localize_if_call pc blk = match Term.last jmp_t blk with
    | None -> KB.return blk
    | Some jmp -> match Jmp.alt jmp with
    | None -> KB.return blk
    | Some alt ->
    Theory.Label.for_addr (Word.int (pc+1)) >>| fun dst ->
    Term.update jmp_t blk @@
    Jmp.reify ()
    ~tid:(Term.tid jmp)
    ?cnd:(Jmp.guard jmp)
    ~dst:(Jmp.resolved dst)
    ~alt

    (* concat two def-only blocks *)
    let append_def_only b1 b2 =
    let b = Blk.Builder.init ~same_tid:true ~copy_defs:true b1 in
    Term.enum def_t b2 |> Seq.iter ~f:(Blk.Builder.add_def b);
    Term.enum jmp_t b2 |> Seq.iter ~f:(Blk.Builder.add_jmp b);
    Blk.Builder.result b

    (* [append xs ys]
    pre: the first block of [xs] is the exit block;
    pre: the first block of [ys] is the entry block;
    pre: the last block of [ys] is the exit block;
    post: the first block of [append xs ys] is the new exit block. *)
    let append xs ys = match xs, ys with
    | [],xs | xs,[] -> xs
    | x :: xs, y :: ys when def_only x ->
    List.rev_append ys (append_def_only x y :: xs)
    | x::xs, y::_ ->
    let fall = Jmp.resolved @@ Term.tid y in
    let x = fall_if_possible fall x in
    List.rev_append ys (x::xs)

    let add_blks sub blks = List.fold blks ~init:sub ~f:(Term.append blk_t)

    let clone ~tid blk =
    let b = Blk.Builder.create ~tid () in
    Term.enum phi_t blk |> Seq.iter ~f:(Blk.Builder.add_phi b);
    Term.enum def_t blk |> Seq.iter ~f:(Blk.Builder.add_def b);
    Term.enum jmp_t blk |> Seq.iter ~f:(Blk.Builder.add_jmp b);
    Blk.Builder.result b

    let mark_entry_blk pc blks =
    Theory.Label.for_addr (Word.int pc) >>| fun tid ->
    match blks with
    | [] -> [Blk.create ~tid ()]
    | blk :: blks -> clone ~tid blk :: blks

    let update_call pc blks =
    match blks with
    | [] -> KB.return []
    | last :: blks ->
    localize_if_call pc last >>| fun last ->
    last :: blks

    let build_program lift source =
    let prog = Program.create () in
    let ctxt = {proc=0; pc=0} in
    Seq.of_array source |>
    KB.Seq.fold ~init:(0,prog) ~f:(fun (pos,prog) proc ->
    Theory.Label.for_name proc.name >>= fun tid ->
    let sub = Sub.create ~tid ~name:proc.name () in
    KB.Seq.fold ~init:(ctxt,prog) ~f:(fun (ctxt,prog) proc ->
    Seq.of_array proc.code |>
    KB.Seq.fold ~init:({proc=pos;pc=0},sub) ~f:(fun (ctxt,sub) insn ->
    lift source {proc=pos; pc=0} insn >>| fun blks ->
    let sub = add_blks sub blks in
    {ctxt with pc = ctxt.pc+1},sub) >>| fun (_,sub) ->
    (pos+1,Term.append sub_t prog sub)) >>| snd
    KB.Seq.fold ~init:(ctxt,[]) ~f:(fun (ctxt,blks) insn ->
    lift source ctxt insn >>=
    mark_entry_blk ctxt.pc >>= fun blks' ->
    update_call ctxt.pc (append blks blks') >>| fun blks ->
    {ctxt with pc = ctxt.pc+1},blks) >>= fun (ctxt,blks) ->
    Theory.Label.for_name proc.name >>| fun tid ->
    let sub = Sub.create ~tid ~name:proc.name () in
    let sub = add_blks sub (List.rev blks) in
    let ctxt = {ctxt with proc = ctxt.proc + 1 } in
    (ctxt,Term.append sub_t prog sub)) >>| snd


    let load filename =
    Sexp.load_sexps filename |>
    @@ -159,7 +227,6 @@ let load filename =
    {name; code}
    | _ -> failwith "Parser error, expects (defun <name> () code)")


    let lifter = KB.Class.declare "bytoy-lifter" ()
    let program_t = KB.Domain.optional "program_t" ~equal:Program.equal
    let program = KB.Class.property lifter "program"
    @@ -170,6 +237,7 @@ let program = KB.Class.property lifter "program"

    let bytecode_domain = KB.Domain.optional "bytecode"
    ~equal:[%equal: prog*ctxt*insn]
    ~inspect:(fun (_,_,insn) -> sexp_of_insn insn)

    let bytecode = KB.Class.property Theory.Program.cls "bytecode"
    ~public:true
    @@ -186,7 +254,7 @@ let provide_semantics () =

    let lift prog =
    let collect_bir prog ctxt code =
    KB.Object.create Theory.Program.cls >>= fun insn ->
    Theory.Label.for_addr (Word.int ctxt.pc) >>= fun insn ->
    KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () ->
    KB.collect Theory.Program.Semantics.slot insn >>| fun sema ->
    KB.Value.get Term.slot sema in
  2. ivg created this gist Apr 20, 2020.
    217 changes: 217 additions & 0 deletions bytoy.ml
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,217 @@
    open Core_kernel
    open Bap_core_theory
    open Bap.Std
    open KB.Syntax
    include Self()

    let package = "bytoy"

    type name = string [@@deriving equal,sexp]
    type oper = Reg of int | Imm of int [@@deriving equal,sexp]
    type insn = name * int * oper [@@deriving equal,sexp]
    type code = insn array [@@deriving equal,sexp]
    type ctxt = {proc : int; pc: int} [@@deriving equal, sexp]
    type proc = {name : string; code : code} [@@deriving equal, sexp]
    type prog = proc array [@@deriving equal,sexp]

    let byte = Theory.Bitv.define 8
    let word = Theory.Bitv.define 32
    let bool = Theory.Bool.t
    let heap = Theory.Mem.define word byte (* global byte-addressable memory *)
    let vars = Theory.Mem.define word word (* local word addressable variables *)

    module Word = Bitvec.M32
    module Byte = Bitvec.M8

    let pp_insn ppf x = Sexp.pp_hum ppf (sexp_of_insn x)

    let r = Array.init 32 ~f:(fun i ->
    Theory.Var.define word (sprintf "R%02d" i))

    let mem = Theory.Var.define heap "heap"
    let local = Theory.Var.define vars "locals"
    let fp = Theory.Var.define word "FP"
    let frame_size = 128

    module Language(Core : Theory.Core) = struct
    open Core

    let reg i = var r.(i)
    let imm x = int word x

    let op : oper -> _ Theory.Bitv.t Theory.pure = function
    | Reg x -> reg x
    | Imm x -> imm (Word.int x)

    let loads x = signed word @@ load (var mem) x
    let loadb x = unsigned word @@ load (var mem) x
    let loadw x = loadw word b1 (var mem) x
    let saveb p x = store (var mem) p (low byte x)
    let savew p x = storew b1 (var mem) p x

    let get x = load (var local) (add (var fp) x)
    let put p x = store (var local) (add (var fp) p) x

    let case_binop name ~matches:ok no = match name with
    | "add" -> ok add
    | "sub" -> ok sub
    | "mul" -> ok mul
    | "div" -> ok div
    | _ -> no ()

    let case_loadw name ~matches:ok no = match name with
    | "lds" -> ok loads
    | "ldb" -> ok loadb
    | "ldw" -> ok loadw
    | _ -> no ()

    let case_savew name ~matches:ok no = match name with
    | "stb" -> ok saveb
    | "stw" -> ok savew
    | _ -> no ()

    let pass = perform Theory.Effect.Sort.bot
    let skip = perform Theory.Effect.Sort.bot

    let block seq data ctrl =
    Theory.Label.for_addr (Word.int seq) >>= fun label ->
    blk label data ctrl

    let data seq data = block seq data skip
    let ctrl seq ctrl = block seq pass ctrl

    let const x = int word (Word.int x)

    let new_frame pc = seq
    (set fp (add (var fp) (const (frame_size+1))))
    (set local (put (const ~-1) (const pc)))

    let del_frame =
    set fp (sub (var fp) (const (frame_size+1)))

    let jumps prog {proc; pc} name r x = match x with
    | Reg _ -> failwith "ill-formed program: non-const jump"
    | Imm dst ->
    match name with
    | "call" ->
    Theory.Label.for_name prog.(dst).name >>= fun dst ->
    block pc (new_frame pc) (goto dst)
    | "ret" ->
    block pc del_frame (jmp (get (const frame_size)))
    | "jmp" ->
    Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst ->
    ctrl pc @@ goto dst
    | "beq" ->
    Theory.Label.for_addr (Word.int (pc+dst)) >>= fun dst ->
    Theory.Label.for_addr (Word.int (pc+1)) >>= fun fall ->
    ctrl pc @@ branch (non_zero (reg r))
    (goto dst)
    (goto fall)
    | opcode -> invalid_argf "unknown opcode %S" opcode ()

    let insn : prog -> ctxt -> insn -> _ = fun prog {proc;pc} (name,d,s) ->
    case_binop name ~matches:(fun f ->
    data pc @@ set r.(d) (f (reg d) (op s))) @@ fun () ->
    case_loadw name ~matches:(fun load ->
    data pc @@ set r.(d) (load (op s))) @@ fun () ->
    case_savew name ~matches:(fun save ->
    data pc @@ set mem (save (reg d) (op s))) @@ fun () ->
    match name with
    | "get" ->
    data pc @@ set r.(d) (get (op s))
    | "set" ->
    data pc @@ set local (put (reg d) (op s))
    | _ -> jumps prog {proc; pc} name d s


    end


    let add_blks sub = function
    | [] -> sub
    | first :: _ as blks ->
    match Term.last blk_t sub with
    | Some last when Term.length jmp_t first = 0 ->
    let fall = Jmp.create_goto (Direct (Term.tid first)) in
    let last = Term.append jmp_t last fall in
    let sub = Term.update blk_t sub last in
    List.fold blks ~init:sub ~f:(Term.append blk_t)
    | _ -> List.fold blks ~init:sub ~f:(Term.append blk_t)

    let build_program lift source =
    let prog = Program.create () in
    Seq.of_array source |>
    KB.Seq.fold ~init:(0,prog) ~f:(fun (pos,prog) proc ->
    Theory.Label.for_name proc.name >>= fun tid ->
    let sub = Sub.create ~tid ~name:proc.name () in
    Seq.of_array proc.code |>
    KB.Seq.fold ~init:({proc=pos;pc=0},sub) ~f:(fun (ctxt,sub) insn ->
    lift source {proc=pos; pc=0} insn >>| fun blks ->
    let sub = add_blks sub blks in
    {ctxt with pc = ctxt.pc+1},sub) >>| fun (_,sub) ->
    (pos+1,Term.append sub_t prog sub)) >>| snd

    let load filename =
    Sexp.load_sexps filename |>
    Array.of_list_map ~f:(function
    | Sexp.List (Atom "defun" :: Atom name :: _ :: code) ->
    let code = code_of_sexp (Sexp.List code) in
    {name; code}
    | _ -> failwith "Parser error, expects (defun <name> () code)")


    let lifter = KB.Class.declare "bytoy-lifter" ()
    let program_t = KB.Domain.optional "program_t" ~equal:Program.equal
    let program = KB.Class.property lifter "program"
    ~public:true
    ~package
    program_t


    let bytecode_domain = KB.Domain.optional "bytecode"
    ~equal:[%equal: prog*ctxt*insn]

    let bytecode = KB.Class.property Theory.Program.cls "bytecode"
    ~public:true
    ~package
    bytecode_domain

    let provide_semantics () =
    KB.promise Theory.Program.Semantics.slot @@ fun insn ->
    Theory.(instance>=>require) () >>= fun (module Core) ->
    let module Lift = Language(Core) in
    KB.collect bytecode insn >>= function
    | None -> KB.return Insn.empty
    | Some (prog,ctxt,code) -> Lift.insn prog ctxt code

    let lift prog =
    let collect_bir prog ctxt code =
    KB.Object.create Theory.Program.cls >>= fun insn ->
    KB.provide bytecode insn (Some (prog,ctxt,code)) >>= fun () ->
    KB.collect Theory.Program.Semantics.slot insn >>| fun sema ->
    KB.Value.get Term.slot sema in
    build_program collect_bir prog

    let run_lifter prog =
    let obj =
    KB.Object.create lifter >>= fun lifter ->
    lift prog >>= fun prog ->
    KB.provide program lifter (Some prog) >>| fun () ->
    lifter in
    match KB.run lifter obj (Toplevel.current ()) with
    | Error conflict ->
    error "ill-formed program: %a" KB.Conflict.pp conflict;
    invalid_arg "ill-formed program"
    | Ok (value,state) ->
    Toplevel.set state;
    match KB.Value.get program value with
    | None -> assert false
    | Some prog -> prog

    let () = Project.Input.register_loader "bytoy" @@ fun filename ->
    let empty = Memmap.empty in
    let source = load filename in
    provide_semantics ();
    let prog = run_lifter source in
    Project.Input.create `unknown filename ~code:empty ~data:empty
    ~finish:(fun proj -> Project.with_program proj prog)