标题: Angr符号执行练习--对付ARM AARCH64 CFF 创建: 2025-09-29 10:45 更新: 2025-09-30 10:00 链接: https://scz.617.cn/unix/202509291045.txt -------------------------------------------------------------------------- 目录: ☆ 背景介绍 ☆ some_cff_patch.py ☆ 用GAMBA对付SUB -------------------------------------------------------------------------- ☆ 背景介绍 参看 OLLVM CFF去平坦化原理 - lyl610abc [2025-8-27] https://www.52pojie.cn/thread-2056288-1-1.html 上文提供了ARM AARCH64平台的CFF测试用例libmmap_exec.so,作者给了混淆前后的 so,据此可检查反CFF结果是否符合预期。我预处理过样本,得到更小的测试用例, foo()、bar()、baz()均有CFF。为什么这么干呢?因为Angr脚本处理原始so非常耗时, 具体是proj.analyses.CFG()这一步非常耗时,还会触发许多警告。裁剪后的so,可 让我们聚焦被混淆过的目标函数,快速完成符号执行。 IDA的D810插件可对付some_cff_small.so,既对付CFF也对付SUB。本文演示Angr符号 执行反AARCH64 CFF。SUB则用GAMBA手工化简之。 完整测试用例打包 https://scz.617.cn/unix/202509291045.txt https://scz.617.cn/unix/202509291045.7z ☆ some_cff_patch.py some_cff_patch.py实际源自 https://github.com/cq674350529/deflat/blob/master/flat_control_flow/deflat.py am_graph模块实际源自 https://github.com/angr/angr-management/blob/master/angrmanagement/utils/graph.py -------------------------------------------------------------------------- import sys, struct, collections import angr, claripy, pyvex import am_graph def get_func_from_addr ( proj, addr ) : try : return proj.kb.functions.get_by_addr( addr ) except KeyError : return proj.kb.functions.floor_func( addr ) def get_insns_of_node ( proj, node ) : buf = proj.loader.memory.load( node.addr, node.size ) insns = list( proj.arch.capstone.disasm( buf, node.addr ) ) return insns def get_cond_jmp ( proj, node ) : ret = None CONDITIONAL_BRANCH_MNEMONICS \ = { 'CBZ', 'CBNZ', 'TBZ', 'TBNZ' } insns = get_insns_of_node( proj, node ) insn = insns[-1] mnemonic = insn.mnemonic.upper() is_b_cond = mnemonic.startswith( 'B.' ) and mnemonic not in ( 'B', 'BL' ) is_other_cond \ = mnemonic in CONDITIONAL_BRANCH_MNEMONICS if is_b_cond or is_other_cond : ret = insn return ret def get_some_nodes_0 ( supergraph, threshold ) : prologue_node = None retn_node = None pre_dispatcher_nodes \ = [] dispatcher_nodes \ = [] for node in supergraph.nodes() : if 0 == supergraph.in_degree( node ) : assert prologue_node is None prologue_node = node if 0 == supergraph.out_degree( node ) and len( node.out_branches ) == 0 : assert retn_node is None retn_node = node if 1 == supergraph.out_degree( node ) and \ len( node.out_branches ) == 1 and \ supergraph.in_degree( node ) >= threshold : pre_dispatcher_nodes.append( node ) dispatcher_nodes.append( list( supergraph.successors( node ) )[0] ) assert prologue_node is not None assert retn_node is not None assert pre_dispatcher_nodes assert dispatcher_nodes print( 'prologue_node: %#x' % prologue_node.addr ) print( 'retn_node: %#x' % retn_node.addr ) print( f'pre_dispatcher_nodes[{len(pre_dispatcher_nodes)}]:' ) for i, node in enumerate( pre_dispatcher_nodes ) : print( f'[{i}] {node.addr:#x} ({node.size})' ) print( f'dispatcher_nodes[{len(dispatcher_nodes)}]:' ) for i, node in enumerate( dispatcher_nodes ) : print( f'[{i}] {node.addr:#x} ({node.size})' ) return prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes def get_some_nodes_1 ( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node ) : relevant_nodes = [] nop_nodes = [] pre_dispatcher_addrs = {node.addr for node in pre_dispatcher_nodes} for node in supergraph.nodes() : if node.addr == prologue_node.addr or \ node.addr == retn_node.addr or \ node.addr in pre_dispatcher_addrs : continue predecessors = list( supergraph.predecessors( node ) ) if not len( predecessors ) : continue for predecessor in predecessors : insn = get_cond_jmp( proj, predecessor ) if insn is not None : mnemonic = insn.mnemonic.upper() if mnemonic == 'B.EQ' : successors = list( supergraph.successors( predecessor ) ) target = int( insn.op_str[1:], 16 ) assert successors[0].addr == target if node.addr == successors[0].addr : relevant_nodes.append( node ) if node not in relevant_nodes : nop_nodes.append( node ) assert relevant_nodes print( f'nop_nodes[{len(nop_nodes)}]:' ) for i, node in enumerate( nop_nodes ) : print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' ) print( f'relevant_nodes[{len(relevant_nodes)}]:' ) for i, node in enumerate( relevant_nodes ) : print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' ) return relevant_nodes, nop_nodes def symbolic_execution ( proj, keep_blocks, start_addr, hook_addrs, set_value=None ) : def retn_procedure ( state ) : proj.unhook( state.addr ) return def statement_inspect ( state ) : expressions = list( state.scratch.irsb.statements[state.inspect.statement].expressions ) if len( expressions ) != 0 and isinstance( expressions[0], pyvex.expr.ITE ) : state.scratch.temps[expressions[0].cond.tmp] = set_value state.inspect._breakpoints['statement'] = [] if hook_addrs : for addr in hook_addrs : proj.hook( addr, retn_procedure, length=4 ) init_state = proj.factory.blank_state( addr = start_addr, add_options = { angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY, angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS, angr.options.BYPASS_UNSUPPORTED_SYSCALL, }, remove_options = { angr.options.LAZY_SOLVES, } ) if set_value is not None : init_state.inspect.b( 'statement', when=angr.BP_BEFORE, action=statement_inspect ) sm = proj.factory.simulation_manager( init_state ) sm.step() while len( sm.active ) > 0 : for state in sm.active : if state.addr in keep_blocks : return state.addr sm.step() return None def get_flow ( proj, prologue_node, relevant_nodes, retn_node ) : symbolic_execution_target \ = [prologue_node] symbolic_execution_target.extend( relevant_nodes ) keep_blocks = [node.addr for node in relevant_nodes] keep_blocks.extend( [retn_node.addr,] ) print( f'keep_blocks[{len(keep_blocks)}]:' ) for i, addr in enumerate( keep_blocks ) : print( f'[{i}] {addr:#x}' ) keep_blocks = set( keep_blocks ) flow = collections.defaultdict( list ) ins_dict = {} for node in symbolic_execution_target : block = proj.factory.block( node.addr, size=node.size ) has_branch = False hook_addrs = set() for ins in block.capstone.insns : if ins.mnemonic.startswith( 'csel' ) : if node not in ins_dict : ins_dict[node] = ins has_branch = True elif ins.mnemonic in { 'bl', 'blr' }: hook_addrs.add( ins.address ) if has_branch : next_addr = symbolic_execution( proj, keep_blocks, node.addr, hook_addrs, claripy.BVV( 1, 1 ) ) if next_addr is not None : flow[node].append( next_addr ) next_addr = symbolic_execution( proj, keep_blocks, node.addr, hook_addrs, claripy.BVV( 0, 1 ) ) if next_addr is not None : flow[node].append( next_addr ) else : next_addr = symbolic_execution( proj, keep_blocks, node.addr, hook_addrs ) if next_addr is not None : flow[node].append( next_addr ) print( f'flow[{len(flow)}]:' ) for i, ( k, v ) in enumerate( flow.items() ) : print( '[%d] %#x - %#x (%d) ->' % ( i, k.addr, k.addr+k.size, k.size ), [hex(child) for child in v] ) return ( flow, ins_dict, ) OPCODES = { 'eq' : 0x0, 'ne' : 0x1, 'hs' : 0x2, 'lo' : 0x3, 'mi' : 0x4, 'pl' : 0x5, 'vs' : 0x6, 'vc' : 0x7, 'hi' : 0x8, 'ls' : 0x9, 'ge' : 0xa, 'lt' : 0xb, 'gt' : 0xc, 'le' : 0xd, 'nop' : b'\x1f\x20\x03\xd5', } def fill_nop ( proj, buf, addr, size ) : nop = OPCODES['nop'] if proj.arch.memory_endness == "Iend_BE" : nop = nop[::-1] off = proj.loader.main_object.addr_to_offset( addr ) for i in range( 0, size, 4 ) : buf[off+i] = nop[0] buf[off+i+1] = nop[1] buf[off+i+2] = nop[2] buf[off+i+3] = nop[3] def get_j_ins ( f_addr, t_addr, j_type ) : if 'b' == j_type : if f_addr > t_addr : j_ins = struct.pack( ' %#x' % ( addr, children[0] ) ) patch_ins( proj, buf, addr, j_ins ) else : ins = ins_dict[parent] j_ins = get_j_ins( ins.address, children[0], ins.op_str.split(',')[-1].strip() ) if proj.arch.memory_endness == "Iend_BE" : j_ins = j_ins[::-1] print( 'Patch %#x => %#x, %#x' % ( ins.address, children[0], children[1] ) ) patch_ins( proj, buf, ins.address, j_ins ) j_ins = get_j_ins( ins.address+4, children[1], 'b' ) if proj.arch.memory_endness == "Iend_BE" : j_ins = j_ins[::-1] patch_ins( proj, buf, ins.address+4, j_ins ) def dosth ( proj, buf, addr ) : print( f'func {addr:#x}' ) func = get_func_from_addr( proj, addr ) supergraph = am_graph.to_supergraph( func.transition_graph ) prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes \ = get_some_nodes_0( supergraph, 4 ) print( "" ) relevant_nodes, nop_nodes \ = get_some_nodes_1( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node ) print( "" ) flow, ins_dict = get_flow( proj, prologue_node, relevant_nodes, retn_node ) print( "" ) patch_buf( proj, buf, nop_nodes, flow, ins_dict ) print( "" ) def main ( argv ) : base_addr = 0 proj = angr.Project( argv[1], load_options = { 'auto_load_libs' : False, 'main_opts' : { 'base_addr' : base_addr } } ) cfg = proj.analyses.CFG( force_smart_scan = False, force_complete_scan = True, normalize = True, resolve_indirect_jumps \ = True, fail_fast = True ) with open( argv[1], 'rb' ) as f : buf = bytearray( f.read() ) origsize = len( buf ) addrlist = ( 0x51020, 0x51290, 0x514f0 ) for addr in addrlist : dosth( proj, buf, addr ) assert len( buf ) == origsize with open( argv[2], 'wb' ) as f : f.write( buf ) if "__main__" == __name__ : main( sys.argv ) -------------------------------------------------------------------------- 暂不清楚some_cff_small.so使用何种CFF工具生成,不能简单套用标准OLLVM CFF的 反混淆过程。针对此特例,可检查汇编指令,B.EQ指令的跳转目标即"有效块",另一 分支可舍弃。符号执行与以前的套路相同,x86需要Hook call,ARM需要Hook bl或 blr。恢复控制流与以前的套路相同,x86关注cmov,ARM关注csel。Patch时,对 len(children)为1的情形,检查parent最后一条指令,若非b指令,需Patch下一条指 令,避免破坏原有效代码,这是针对此特例的工程实践踩过的坑。 将来碰上其他ARM AARCH64样本,可在some_cff_patch.py基础上修改,一是寻找识别 "有效块"的办法,二是注意Patch时勿破坏原有效代码。细节部分需要具体样本具体 分析,整体思路不会有大变化。 some_cff_patch.py含有历史遗迹代码,出于某些个人考虑,未精简。 $ python3 some_cff_patch.py some_cff_small.so some_cff_new.so 输出较多,只展示foo()的信息 func 0x51020 prologue_node: 0x51020 retn_node: 0x51278 pre_dispatcher_nodes[2]: [0] 0x51288 (4) [1] 0x511ac (4) dispatcher_nodes[2]: // 实际未使用 [0] 0x51048 (24) [1] 0x510e4 (24) nop_nodes[10]: [0] 0x51048 - 0x51060 (24) [1] 0x51060 - 0x51078 (24) [2] 0x510e4 - 0x510fc (24) [3] 0x51078 - 0x51090 (24) [4] 0x510fc - 0x51114 (24) [5] 0x51090 - 0x510a8 (24) [6] 0x51114 - 0x5112c (24) [7] 0x510a8 - 0x510b0 (8) [8] 0x5112c - 0x51144 (24) [9] 0x51144 - 0x5114c (8) relevant_nodes[7]: [0] 0x510b0 - 0x510e4 (52) [1] 0x511d8 - 0x5124c (116) [2] 0x5114c - 0x51174 (40) [3] 0x51194 - 0x511ac (24) [4] 0x5124c - 0x51278 (44) [5] 0x51174 - 0x51194 (32) [6] 0x511b0 - 0x511d8 (40) keep_blocks[8]: [0] 0x510b0 [1] 0x511d8 [2] 0x5114c [3] 0x51194 [4] 0x5124c [5] 0x51174 [6] 0x511b0 [7] 0x51278 flow[8]: [0] 0x51020 - 0x51048 (40) -> ['0x510b0'] [1] 0x510b0 - 0x510e4 (52) -> ['0x5114c'] [2] 0x511d8 - 0x5124c (116) -> ['0x5124c'] [3] 0x5114c - 0x51174 (40) -> ['0x51194', '0x51174'] [4] 0x51194 - 0x511ac (24) -> ['0x511b0'] [5] 0x5124c - 0x51278 (44) -> ['0x510b0'] [6] 0x51174 - 0x51194 (32) -> ['0x511b0'] [7] 0x511b0 - 0x511d8 (40) -> ['0x511d8', '0x51278'] Patch 0x51048 => 0x510b0 Patch 0x510e4 => 0x5114c Patch 0x51248 => 0x5124c Patch 0x51168 => 0x51194, 0x51174 Patch 0x511ac => 0x511b0 Patch 0x51274 => 0x510b0 Patch 0x51190 => 0x511b0 Patch 0x511cc => 0x511d8, 0x51278 ☆ 用GAMBA对付SUB 用IDA64分析some_cff_new.so -------------------------------------------------------------------------- __int64 __fastcall foo(__int64 a1, __int64 a2, char a3) { int i; for ( i = 0; i < (unsigned __int64)strlen(a1); ++i ) *(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xC2 | *(_BYTE *)(a1 + i) & 0x3D) ^ (~a3 & 0xC2 | a3 & 0x3D); return 123LL; } -------------------------------------------------------------------------- __int64 __fastcall bar(__int64 a1, __int64 a2, char a3) { int i; for ( i = 0; i < (unsigned __int64)strlen(a1); ++i ) *(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xB3 | *(_BYTE *)(a1 + i) & 0x4C) ^ (~a3 & 0xB3 | a3 & 0x4C); return 610LL; } -------------------------------------------------------------------------- __int64 __fastcall baz(__int64 a1, __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6) { if ( a6 < 0 ) return 0LL; if ( ~(~(a2 + 4095) | 0xFFF) < a2 ) return 0LL; return _mmap(a1, a2, a3, a4, a5, a6); } -------------------------------------------------------------------------- some_cff_new.so中已经没有CFF了,但有"指令替换",涉及MBA。 从IDA 8.3起,有内置插件gooMBA,某些情况下可化简MBA。官方blog与众多MBA化简 工具比较后,把gooMBA吹得天花乱坠的,但我用some_cff_new.so实测,未能化简MBA, 失望。相比之下,D810、GAMBA均成功化简some_cff_new.so中的MBA。 $ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xc2 | x & 0x3d) ^ (~a3 & 0xc2 | a3 & 0x3d)" a3^x $ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xb3 | x & 0x4c) ^ (~a3 & 0xb3 | a3 & 0x4c)" a3^x $ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 64 "~(~(a2 + 4095) | 0xfff)" -4096&4095+a2 +的优先级高于& 作为对比,some_normal_small.so中这几个函数如下 -------------------------------------------------------------------------- __int64 __fastcall foo(__int64 a1, __int64 a2, char a3) { int i; for ( i = 0; i < (unsigned __int64)strlen(a1); ++i ) *(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3; return 123LL; } -------------------------------------------------------------------------- __int64 __fastcall bar(__int64 a1, __int64 a2, char a3) { int i; for ( i = 0; i < (unsigned __int64)strlen(a1); ++i ) *(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3; return 610LL; } -------------------------------------------------------------------------- __int64 __fastcall baz(__int64 a1, signed __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6) { if ( a6 < 0 ) return 0LL; if ( (__int64)((a2 + 4095) & 0xFFFFFFFFFFFFF000LL) >= a2 ) return _mmap(a1, a2, a3, a4, a5, a6); return 0LL; } --------------------------------------------------------------------------