Last active
March 8, 2024 12:04
-
-
Save LeadroyaL/80a5f6fbb83ee1c102c860aaf2bc594d to your computer and use it in GitHub Desktop.
Unicorn实战(一):去掉libcms.so的花指令
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
from elftools.elf.constants import P_FLAGS | |
from elftools.elf.elffile import ELFFile | |
from unicorn import Uc, UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN, UC_HOOK_CODE, UC_PROT_READ, UC_PROT_WRITE, UC_PROT_EXEC | |
from unicorn.arm_const import * | |
from capstone import Cs, CS_ARCH_ARM, CS_MODE_THUMB, CsInsn | |
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM | |
# 找到.text节 | |
filename = "./libcms.so" | |
fd = open(filename, 'rb') | |
elf = ELFFile(fd) | |
sh_offset = elf.get_section_by_name(".text").header['sh_offset'] | |
sh_size = elf.get_section_by_name(".text").header['sh_size'] | |
fd.seek(sh_offset) | |
text_data = fd.read(sh_size) | |
# 找到 [PUSH{...}; MOV RX,PC; MOV RX,PC -> 只找thumb | |
cs = Cs(CS_ARCH_ARM, CS_MODE_THUMB) | |
# (address, push_regs) | |
entries = [] | |
step = 1000 | |
for i in range(0, len(text_data), step): | |
_i = max(0, i - 10) | |
g = cs.disasm(text_data[_i:_i + step], 0) | |
while True: | |
try: | |
ins = next(g) | |
assert isinstance(ins, CsInsn) | |
# push {rx, rx} | |
if ins.mnemonic != 'push': | |
continue | |
ins2 = next(g) | |
assert isinstance(ins2, CsInsn) | |
# mov rx, pc | |
if not ins2.mnemonic.startswith('mov') or not ins2.op_str.endswith('pc'): | |
continue | |
ins3 = next(g) | |
assert isinstance(ins3, CsInsn) | |
# mov rx, pc | |
if not ins3.mnemonic.startswith('mov') or not ins3.op_str.endswith('pc'): | |
continue | |
entries.append((_i + sh_offset + ins.address, ins.op_str)) | |
except StopIteration: | |
break | |
print(entries) | |
# 加载 so 到内存中 | |
def align(addr, size, align): | |
fr_addr = addr // align * align | |
to_addr = (addr + size + align - 1) // align * align | |
return fr_addr, to_addr - fr_addr | |
def pflags2prot(p_flags): | |
ret = 0 | |
if p_flags & P_FLAGS.PF_R != 0: | |
ret |= UC_PROT_READ | |
if p_flags & P_FLAGS.PF_W != 0: | |
ret |= UC_PROT_WRITE | |
if p_flags & P_FLAGS.PF_X != 0: | |
ret |= UC_PROT_EXEC | |
return ret | |
load_base = 0 | |
emu = Uc(UC_ARCH_ARM, UC_MODE_LITTLE_ENDIAN) | |
load_segments = [x for x in elf.iter_segments() if x.header.p_type == 'PT_LOAD'] | |
for segment in load_segments: | |
fr_addr, size = align(load_base + segment.header.p_vaddr, segment.header.p_memsz, segment.header.p_align) | |
emu.mem_map(fr_addr, size, pflags2prot(segment.header.p_flags)) | |
emu.mem_write(load_base + segment.header.p_vaddr, segment.data()) | |
# 依次进入所有的entry,执行到栈平衡时退出 | |
STACK_ADDR = 0x7F000000 | |
STACK_SIZE = 1024 * 1024 | |
start_addr = None | |
def hook_code(mu: Uc, address, size, user_data): | |
if mu.reg_read(UC_ARM_REG_PC) != start_addr and mu.reg_read(UC_ARM_REG_SP) == STACK_ADDR + STACK_SIZE: | |
emu.emu_stop() | |
emu.mem_map(STACK_ADDR, STACK_SIZE) | |
emu.hook_add(UC_HOOK_CODE, hook_code) | |
_to_reg_id = { | |
"r0": UC_ARM_REG_R0, "r1": UC_ARM_REG_R1, "r2": UC_ARM_REG_R2, "r3": UC_ARM_REG_R3, | |
"r4": UC_ARM_REG_R4, "r5": UC_ARM_REG_R5, "r6": UC_ARM_REG_R6, "r7": UC_ARM_REG_R7, | |
"r8": UC_ARM_REG_R8, "r9": UC_ARM_REG_R9, "r10": UC_ARM_REG_R10, "r11": UC_ARM_REG_R11, | |
"r12": UC_ARM_REG_R12, "r13": UC_ARM_REG_R13, "r14": UC_ARM_REG_R14, "r15": UC_ARM_REG_R15, | |
"lr": UC_ARM_REG_LR, "pc": UC_ARM_REG_PC, "sp": UC_ARM_REG_SP, | |
"sb": UC_ARM_REG_SB, "sl": UC_ARM_REG_SL, "fp": UC_ARM_REG_FP, "ip": UC_ARM_REG_IP, | |
} | |
ret = [] | |
MAGIC32 = 0x12345678 | |
for push_entry, push_regs in entries: | |
emu.reg_write(UC_ARM_REG_SP, STACK_ADDR + STACK_SIZE) | |
print("Emulate arm code start", hex(push_entry)) | |
start_addr = push_entry | |
for r in push_regs.strip('{}').replace(' ', '').split(','): | |
emu.reg_write(_to_reg_id[r], MAGIC32) | |
emu.emu_start(push_entry + 1, 0, 0, 100) | |
print("Emulation arm code done") | |
changed = False | |
for r in push_regs.strip('{}').replace(' ', '').split(','): | |
if emu.reg_read(_to_reg_id[r]) != MAGIC32: | |
changed = True | |
break | |
stop_addr = emu.reg_read(UC_ARM_REG_PC) | |
if not changed: | |
print("Match:", start_addr, stop_addr) | |
ret.append((start_addr, stop_addr)) | |
else: | |
print("Cannot handle:", start_addr) | |
fd.close() | |
print(ret) | |
# [(55302, 55386), (55390, 55474), (55538, 55624), (55916, 56002), (56006, 56090), (56114, 56200), (60314, 60398), (60780, 60866), (61258, 61342), (61346, 61432), (96392, 96476), (107254, 107338), (107412, 107498), (130468, 130552), (131490, 131574), (131578, 131664), (132818, 132902), (135238, 135324), (135456, 135542), (136624, 136710), (144270, 144354), (144434, 144520), (144856, 144940), (145070, 145156), (147232, 147316), (151298, 151382), (151512, 151598), (151662, 151748), (152022, 152106), (152110, 152196), (157910, 157996), (158170, 158256), (159260, 159346), (159348, 159434), (161294, 161380), (161476, 161562), (161574, 161660), (161806, 161890), (161930, 162016), (165348, 165434), (165504, 165588), (165600, 165686), (165756, 165840), (165852, 165938), (166008, 166092), (166104, 166190), (166260, 166344), (166356, 166442), (166512, 166596), (166608, 166694), (166764, 166848), (166860, 166946), (167016, 167100), (167112, 167198), (167268, 167352), (167364, 167450), (169646, 169732), (169744, 169830), (170418, 170504), (170518, 170604), (172880, 172964), (173540, 173626), (173638, 173724), (173732, 173816), (173820, 173906), (185340, 185424), (185568, 185652), (185830, 185914), (186568, 186654), (211188, 211272), (211418, 211504), (211550, 211634), (211680, 211766), (211790, 211874), (211880, 211968), (211974, 212058), (212068, 212154), (212162, 212246), (214604, 214690), (217610, 217694), (222522, 222608), (224688, 224772), (225110, 225194), (225280, 225366), (226396, 226482), (226680, 226766), (226874, 226960), (227020, 227104), (227172, 227258), (484228, 484314), (486500, 486586), (487556, 487642), (487696, 487782), (487828, 487914), (487916, 488000), (490978, 491064), (492458, 492544), (494114, 494200), (494444, 494528), (494568, 494654), (505414, 505498), (506140, 506226), (507860, 507944), (509882, 509968), (510050, 510136), (510540, 510624), (512724, 512810), (512858, 512942), (513104, 513188), (513282, 513366), (513406, 513490), (516024, 516108), (516152, 516238), (516476, 516562)] | |
# in idapython | |
# for start, stop in ret: | |
# ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB) | |
# a = ks.asm("B.W $+" + str(stop - start)) | |
# PatchByte(start, a[0][0]) | |
# PatchByte(start + 1, a[0][1]) | |
# PatchByte(start + 2, a[0][2]) | |
# PatchByte(start + 3, a[0][3]) |
LeadroyaL,你好,我好像研究出来方法了,用edit->patch prrogram ->Apply patches to input file.覆盖保存了so文件。但是同样遇到找不到JNI_onload 入口文件的情况,我用上面的脚本运行,但是报错:
for item in rst:
patch_in_ghidra(item) error: Traceback (most recent call last):
File "", line 1, in
ImportError: No module named ghidra.app.plugin.assembler
好像是ghidra.app.plugin.assembler包没有安装,但是我在网上也找不到这个包。你能告诉我如何安装吗?非常感谢。
@wqsui 你用的是Ghidra的脚本,作者用的的是ida的脚本。
Ghidra和ida都是反编译工具。Ghidra的脚本并不能在ida中运行
好的,非常感谢!
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
LeadroyaL,你好,我在ida里运行了python脚本,运行好没有报错,同时也没有反应。不知道新的SO文件在哪里?我用的是IDA 7.0版本,我的操作方法是:在IDA里打开Libcms.so,然后选择文件菜单->Scripting Command...,然后在Scripting language 选择 Python。然后运行如下代码:
from keystone import Ks, KS_MODE_THUMB, KS_ARCH_ARM
ret=[(13974, 14062), (14812, 14900), (15246, 15334), ...... 545662), (547720, 547808), (547858, 547946)]
#in idapython
for start, stop in ret:
ks = Ks(KS_ARCH_ARM, KS_MODE_THUMB)
a = ks.asm("B.W $+" + str(stop - start))
PatchByte(start, a[0][0])
PatchByte(start + 1, a[0][1])
PatchByte(start + 2, a[0][2])
PatchByte(start + 3, a[0][3])
运行好后IDA的Output Windows 里没有任何输出。修改后的SO文件怎么获得呢?非常感谢。