import re from functools import reduce from clang.cindex import Index, CursorKind, TypeKind, BaseEnumeration, conf try: import idaapi from ida_bytes import * from ida_typeinf import * from idc import * except: pass class CallingConv(BaseEnumeration): _kinds = [] _name_map = None pass CallingConv.Default = CallingConv(0) CallingConv.C = CallingConv(1) CallingConv.X86StdCall = CallingConv(2) CallingConv.X86FastCall = CallingConv(3) CallingConv.X86ThisCall = CallingConv(4) CallingConv.X86Pascal = CallingConv(5) CallingConv.AAPCS = CallingConv(6) CallingConv.AAPCS_VFP = CallingConv(7) CallingConv.X86RegCall = CallingConv(8) CallingConv.IntelOclBicc = CallingConv(9) CallingConv.Win64 = CallingConv(10) CallingConv.X86_64Win64 = CallingConv.Win64 CallingConv.X86_64SysV = CallingConv(11) CallingConv.X86VectorCall = CallingConv(12) CallingConv.Swift = CallingConv(13) CallingConv.PreserveMost = CallingConv(14) CallingConv.PreserveAll = CallingConv(15) CallingConv.AArch64VectorCall = CallingConv(16) CallingConv.Invalid = CallingConv(100) CallingConv.Unexposed = CallingConv(200) handlers = {} idati = idaapi.get_idati() # idati = idaapi.til_t() if idaapi.BADADDR == 2 ** 64 - 1: FF_POINTER = FF_QWORD POINTER_SIZE = 8 else: FF_POINTER = FF_DWORD POINTER_SIZE = 4 def preprocess(dict): return { key: (ida_type & 0xffffffff, string if string is None else ( parse_decl(string, 0)[1] if string != 'void' else b'\x01' )) for key, (ida_type, string) in dict.items() } def _size_to_flags(size): return { 1: FF_BYTE, 2: FF_WORD, 4: FF_DWORD, 8: FF_QWORD }[size] builtin_types = preprocess({ TypeKind.RECORD: (FF_STRUCT, None), TypeKind.ENUM: (FF_DWORD, 'int'), TypeKind.BOOL: (_size_to_flags(idati.cc.size_b), 'bool'), TypeKind.DOUBLE: (FF_DOUBLE, 'double'), TypeKind.LONGDOUBLE: (FF_DOUBLE, 'double'), TypeKind.FLOAT: (FF_FLOAT, 'float'), TypeKind.WCHAR: (FF_WORD, 'unsigned short'), TypeKind.CHAR16: (FF_WORD, 'unsigned short'), TypeKind.CHAR32: (FF_DWORD, 'unsigned int'), TypeKind.SHORT: (_size_to_flags(idati.cc.size_s), 'short'), TypeKind.USHORT: (_size_to_flags(idati.cc.size_s), 'unsigned short'), TypeKind.INT: (_size_to_flags(idati.cc.size_i), 'int'), TypeKind.LONG: (_size_to_flags(idati.cc.size_l), 'long'), TypeKind.LONGLONG: (_size_to_flags(idati.cc.size_ll), 'long long'), TypeKind.UINT: (_size_to_flags(idati.cc.size_i), 'unsigned int'), TypeKind.ULONG: (_size_to_flags(idati.cc.size_l), 'unsigned long'), TypeKind.ULONGLONG: (_size_to_flags(idati.cc.size_ll), 'unsigned long long'), TypeKind.CHAR_S: (FF_BYTE, 'signed char'), TypeKind.CHAR_U: (FF_BYTE, 'unsigned char'), TypeKind.SCHAR: (FF_BYTE, 'signed char'), TypeKind.UCHAR: (FF_BYTE, 'unsigned char'), TypeKind.INT128: (FF_OWORD, '__int128'), TypeKind.UINT128: (FF_OWORD, 'unsigned __int128'), TypeKind.VOID: (FF_0VOID, 'void'), TypeKind.POINTER: (FF_0OFF | FF_POINTER, None), TypeKind.LVALUEREFERENCE: (FF_0OFF | FF_POINTER, None) }) callingconv_map = { CallingConv.C: idaapi.CM_CC_CDECL, CallingConv.X86FastCall: idaapi.CM_CC_FASTCALL, CallingConv.X86ThisCall: idaapi.CM_CC_THISCALL, CallingConv.X86StdCall: idaapi.CM_CC_STDCALL, CallingConv.X86Pascal: idaapi.CM_CC_PASCAL, } visited = dict() virtuals_mapping = dict() def handle(kind): def decorator(f): handlers[kind] = f return f return decorator @handle(CursorKind.ENUM_DECL) def handle_enum(item, context): members = [] for member in item.get_children(): members.append((member.spelling, member.enum_value)) enum_id = add_enum(idaapi.BADADDR, item.spelling, 0) for name, value in members: add_enum_member(enum_id, name, value, -1) class Struct: def __init__(self, name, is_union, flags=0): self.is_union = is_union self.ti = idaapi.tinfo_t() self.udt = idaapi.udt_type_data_t() self.udt.taudt_bits = flags self.name = name self.save(True) def add_member(self, name, offset, flag, size, tif): member = idaapi.udt_member_t() member.offset = offset member.name = name member.size = size member.type = tif if name.endswith('_vftable'): member.set_vftable() self.udt.push_back(member) def set_align(self, align): self.udt.effalign = align def save(self, replace=True): name = self.name while True: self.ti.create_udt( self.udt, idaapi.BTF_STRUCT if not is_union else idaapi.BTF_UNION) res = self.ti.set_named_type( idati, name, idaapi.NTF_REPLACE if replace else 0) if res == idaapi.TERR_OK: break elif res == idaapi.TERR_SAVE: # name conflict assert replace == False, '?!' name = '_' + name elif res == idaapi.TERR_WRONGNAME: raise Exception('not allowed name: %r' % name) self.name = name return self.ti def is_primitive(kind): if kind not in builtin_types: return False return builtin_types[kind][1] def resolve_pointer(type, context): tif = idaapi.tinfo_t() pointee = type.get_pointee() _register_type(pointee, context) pointee_type = idaapi.tinfo_t() if pointee.kind in (TypeKind.UNEXPOSED,): pointee = pointee.get_canonical() if pointee.kind in (TypeKind.TYPEDEF, TypeKind.POINTER, TypeKind.LVALUEREFERENCE, TypeKind.ELABORATED): pointee_type = _register_type(pointee, context) elif pointee.kind == TypeKind.INVALID: pointee_type.deserialize(idati, builtin_types[TypeKind.VOID][1], b"") elif pointee.kind == TypeKind.FUNCTIONPROTO or is_primitive(pointee.kind): pointee_type = _register_type(pointee, context) else: name = pointee.spelling if not context.resolve(name, lambda name: pointee_type.get_named_type(idati, name)): pointee_type.create_forward_decl(idati, BTF_STRUCT, name) if pointee_type is None: pointee_type = idaapi.tinfo_t() assert pointee_type.create_forward_decl( idati, BTF_STRUCT, pointee.spelling) tif.create_ptr(pointee_type) return tif def _make_vtable(name, virtuals, context): # Creates a special struct(record) for vtable class FakeType(object): def __init__(self, kind, spelling, size=POINTER_SIZE, pointee=None): self.kind = kind self.spelling = spelling self.size = size self.pointee = pointee def get_size(self): return self.size def get_canonical(self): return self def get_pointee(self): return self.pointee vtable_name = "%s_vftable" % (name) struct = Struct(vtable_name, False, idaapi.TAUDT_VFTABLE) for i, func in enumerate(virtuals): size = POINTER_SIZE flag = FF_POINTER | FF_0OFF member_name = '%s' % (func.spelling) tif = resolve_function(func.type, context, class_=name) tif.create_ptr(tif) struct.add_member(member_name, i * POINTER_SIZE, flag, size, tif) visited[vtable_name] = { 'bases': [], 'is_typedef': False, 'resolved': None } struct.save() return FakeType(TypeKind.POINTER, vtable_name + " *", pointee=FakeType(TypeKind.RECORD, vtable_name)) def resolve_function(type, context, flags=0, class_=None): func = idaapi.tinfo_t() data = idaapi.func_type_data_t() data.flags = flags data.rettype = _register_type(type.get_result(), context) data.stkargs = 0 data.spoiled.clear() data.clear() cc = CallingConv.from_id(conf.lib.clang_getFunctionTypeCallingConv(type)) # ida only supports cdecl + ellipsis when varargs exists if type.is_function_variadic(): data.cc = idaapi.CM_CC_ELLIPSIS elif class_: # you can use one of these data.cc = idaapi.CM_CC_THISCALL # data.cc = idaapi.CM_CC_FASTCALL else: data.cc = callingconv_map.get(cc, idaapi.CM_CC_CDECL) if class_: funcarg = idaapi.funcarg_t() class_type_ = _create_forward_declaration(class_) class_type_.create_ptr(class_type_) funcarg.type = class_type_ data.push_back(funcarg) for argument in type.argument_types(): funcarg = idaapi.funcarg_t() funcarg.type = _register_type(argument, context) data.push_back(funcarg) func.create_func(data) func.get_func_details(data) return func def _create_forward_declaration(typename): tif = idaapi.tinfo_t() if tif.get_named_type(idati, typename): return tif tif.create_forward_decl(idati, BTF_STRUCT, typename) return tif def _register_type(type, context, bases=[], virtuals=[]): global debug typename = context.name(type.spelling) found = visited.get(typename) if found: return found['resolved'] result = { 'bases': bases, 'is_typedef': type.kind == TypeKind.TYPEDEF, 'resolved': None } if type.kind == TypeKind.UNEXPOSED: type = type.get_canonical() if type.kind not in (TypeKind.TYPEDEF, TypeKind.ELABORATED): visited[typename] = result if type.kind == TypeKind.ELABORATED: result['resolved'] = tif = idaapi.tinfo_t() tif.create_typedef(idati, typename, BTF_STRUCT) visited[typename] = result return tif if type.kind == TypeKind.VARIABLEARRAY: tif = idaapi.tinfo_t() tif.create_ptr(_register_type(type.element_type, context)) result['resolved'] = tif return tif if type.kind in (TypeKind.CONSTANTARRAY, TypeKind.VECTOR, TypeKind.INCOMPLETEARRAY): count = type.element_count if type.kind != TypeKind.INCOMPLETEARRAY else 1 tif = idaapi.tinfo_t() debug = type tif.create_array(_register_type(type.element_type, context), count) result['resolved'] = tif return tif if is_primitive(type.kind): tif = idaapi.tinfo_t() assert tif.deserialize( idati, builtin_types[type.kind][1], b"") result['resolved'] = tif return tif if type.kind == TypeKind.FUNCTIONPROTO: result['resolved'] = tif = resolve_function(type, context) return tif if type.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE): result['resolved'] = tif = resolve_pointer(type, context) return tif if type.kind == TypeKind.TYPEDEF: canonical = type.get_canonical() original = canonical.spelling # if original == typename: # del visited[typename] tif = _register_type(canonical, context) if original != typename and tif: tif.set_named_type(idati, typename, NTF_TYPE) result['resolved'] = tif visited[typename] = result origkey = typename.split("<")[0] target = canonical.spelling.split("<")[0] if target in virtuals_mapping: virtuals_mapping[origkey] = virtuals_mapping[target] return tif else: debug = type return tif if type.kind == TypeKind.RECORD: align = type.get_align() item = type unique_sizes = set(x.get_field_offsetof() for x in type.get_fields()) if len(unique_sizes) == 1 and list(unique_sizes)[0] == -2: is_union = False should_guess = True else: is_union = len(unique_sizes) == 1 and len( list(type.get_fields())) != 1 should_guess = False if item.get_size() == -2: # forward declaration tif = _create_forward_declaration(typename) del visited[typename] result['resolved'] = tif return tif members = [] offset = 0 # populate_bases(members, base) delta = 0 has_virtuals = False for i, base in enumerate(bases): base_type = base.type base_size = base.type.get_size() base_align = base.type.get_align() if virtuals_mapping[context.name(base_type.spelling.split('<')[0])]: has_virtuals = True has_virtuals = has_virtuals or len(virtuals) for i, base in enumerate(bases): base_type = base.type base_size = base.type.get_size() base_align = base.type.get_align() base_virtuals = virtuals_mapping[context.name( base_type.spelling.split('<')[0])] vtable_delta = POINTER_SIZE if has_virtuals and not base_virtuals and i == 0 else 0 for member in base_type.get_fields(): # If has virtuals and not first, we should substract vtable pointer size members.append((vtable_delta * 8 + offset + member.get_field_offsetof() - delta * 8, member.type, 'base%d_%s' % (i, member.spelling))) print(delta, offset // 8, member.get_field_offsetof() // 8) if i: members.insert(0, (offset, _make_vtable( context.name(base.spelling), base_virtuals, context), "base%d__vftable" % i)) _register_type(base_type, context) offset += (base_size + base_align - 1) // \ base_align * base_align * 8 if not i: virtuals = base_virtuals + virtuals __visited = set() virtuals = [x for x in virtuals if (x.spelling, x.type.spelling) not in __visited and ( __visited.add((x.spelling, x.type.spelling)) or True)] if virtuals: members.insert( 0, (0, _make_vtable(typename, virtuals, context), "__vftable")) virtuals_mapping[typename] = virtuals for member in item.get_fields(): if member.is_bitfield(): continue if member.kind == CursorKind.FIELD_DECL: members.append((member.get_field_offsetof(), member.type, member.spelling)) else: continue struc = Struct(typename, is_union, idaapi.TAUDT_CPPOBJ if virtuals else 0) struc.set_align(align.bit_length() - 1) for offset, type, name_ in members: size = type.get_size() if not name_: name_ = '__offset%x' % (offset >> 3) if size < 0: if type.kind == TypeKind.INCOMPLETEARRAY: # later fixed to array size = type.element_type.get_size() else: debug = type wtf continue flag = 0 canonical = type.get_canonical() tif = None if canonical.kind == TypeKind.RECORD: tif = _register_type(canonical, context) elif canonical.kind in (TypeKind.POINTER, TypeKind.LVALUEREFERENCE): tif = resolve_pointer(canonical, context) else: if canonical.kind in builtin_types: flag |= builtin_types[canonical.kind][0] tif = _register_type(canonical, context) if offset % 8: continue res = struc.add_member(name_, offset >> 3, flag, size, tif) tif = result['resolved'] = struc.save() return tif @handle(CursorKind.CLASS_DECL) @handle(CursorKind.CLASS_TEMPLATE) @handle(CursorKind.STRUCT_DECL) @handle(CursorKind.UNION_DECL) def handle_struct(item, context): # Is there any way to check if it's forward declaration or not? if len(list(item.get_children())) == 0 and item.type.get_size() > 1: # forward class/struct declaration return bases = [] virtuals = [] virtuals_mapping[context.name(item.spelling)] = virtuals for member in item.get_children(): if member.kind == CursorKind.CXX_BASE_SPECIFIER: bases.append(member) elif member.kind in (CursorKind.CXX_METHOD, CursorKind.DESTRUCTOR, CursorKind.CONSTRUCTOR): if member.is_virtual_method(): virtuals.append(member) _register_type(item.type, context, bases, virtuals) @handle(CursorKind.TYPEDEF_DECL) @handle(CursorKind.TYPE_ALIAS_DECL) @handle(CursorKind.FUNCTION_DECL) @handle(CursorKind.VAR_DECL) def typedefs(item, context): type = _register_type(item.type, context) if item.kind in (CursorKind.FUNCTION_DECL, CursorKind.VAR_DECL): address = get_name_ea_simple(item.spelling) if address != idaapi.BADADDR: res = idaapi.apply_tinfo( address, type, idaapi.TINFO_DELAYFUNC | idaapi.TINFO_DEFINITE) @handle(CursorKind.NAMESPACE) def namespace(item, context): process_cursor(item, context.nest_namespace(item.spelling)) @handle(CursorKind.LINKAGE_SPEC) @handle(CursorKind.UNEXPOSED_DECL) def linkage(item, context): process_cursor(item, context) def parse_file(path, args=[]): index = Index.create() tx = index.parse(path, args) if idaapi.BADADDR == 2 ** 64 - 1: args.insert(0, '-m64') else: args.insert(0, '-m32') process_cursor(tx.cursor) def parse_ast(path, args=[]): index = Index.create() tx = TranslationUnit.from_ast_file(path, index) if idaapi.BADADDR == 2 ** 64 - 1: args.insert(0, '-m64') else: args.insert(0, '-m32') process_cursor(tx.cursor) class Context(object): def __init__(self, namespaces=[]): self.namespaces = namespaces def nest_namespace(self, namespace): return Context(namespaces=self.namespaces+[namespace]) def name(self, name): return Context._generate_name(name, self.namespaces) @staticmethod def _generate_name(name, namespaces): name = re.sub('^(const |volatile |struct |union |class )+', '', name) name = re.sub(r'[^a-zA-Z0-9:<>=]', '_', name) if not namespaces: return name prefix = "::".join(namespaces) + "::" if name.startswith(prefix): return name return "%s%s" % (prefix, name) def resolve(self, name, predicate): return reduce( lambda acc, item: acc or predicate( Context._generate_name(name, item)), (self.namespaces[:- i] for i in range(len(self.namespaces) + 1)), False ) def process_cursor(cursor, context=None): if context is None: context = Context() for item in cursor.get_children(): print(item.location.file.name, item.location.line, item.kind, item.spelling) if item.kind in handlers: handlers[item.kind](item, context) else: continue