标题: Angr符号执行练习--对付ARM AARCH64 CFF

创建: 2025-09-29 10:45
更新: 2025-09-30 10:00
链接: https://scz.617.cn/unix/202509291045.txt

--------------------------------------------------------------------------

目录:

    ☆ 背景介绍
    ☆ some_cff_patch.py
    ☆ 用GAMBA对付SUB

--------------------------------------------------------------------------

☆ 背景介绍

参看

OLLVM CFF去平坦化原理 - lyl610abc [2025-8-27]
https://www.52pojie.cn/thread-2056288-1-1.html

上文提供了ARM AARCH64平台的CFF测试用例libmmap_exec.so，作者给了混淆前后的
so，据此可检查反CFF结果是否符合预期。我预处理过样本，得到更小的测试用例，
foo()、bar()、baz()均有CFF。为什么这么干呢？因为Angr脚本处理原始so非常耗时，
具体是proj.analyses.CFG()这一步非常耗时，还会触发许多警告。裁剪后的so，可
让我们聚焦被混淆过的目标函数，快速完成符号执行。

IDA的D810插件可对付some_cff_small.so，既对付CFF也对付SUB。本文演示Angr符号
执行反AARCH64 CFF。SUB则用GAMBA手工化简之。

完整测试用例打包

https://scz.617.cn/unix/202509291045.txt
https://scz.617.cn/unix/202509291045.7z

☆ some_cff_patch.py

some_cff_patch.py实际源自

https://github.com/cq674350529/deflat/blob/master/flat_control_flow/deflat.py

am_graph模块实际源自

https://github.com/angr/angr-management/blob/master/angrmanagement/utils/graph.py

--------------------------------------------------------------------------
import sys, struct, collections
import angr, claripy, pyvex
import am_graph

def get_func_from_addr ( proj, addr ) :
    try :
        return proj.kb.functions.get_by_addr( addr )
    except KeyError :
        return proj.kb.functions.floor_func( addr )

def get_insns_of_node ( proj, node ) :
    buf     = proj.loader.memory.load( node.addr, node.size )
    insns   = list( proj.arch.capstone.disasm( buf, node.addr ) )
    return insns

def get_cond_jmp ( proj, node ) :
    ret         = None
    CONDITIONAL_BRANCH_MNEMONICS \
                = {
        'CBZ',
        'CBNZ',
        'TBZ',
        'TBNZ'
    }
    insns       = get_insns_of_node( proj, node )
    insn        = insns[-1]
    mnemonic    = insn.mnemonic.upper()
    is_b_cond   = mnemonic.startswith( 'B.' ) and mnemonic not in ( 'B', 'BL' )
    is_other_cond \
                = mnemonic in CONDITIONAL_BRANCH_MNEMONICS
    if is_b_cond or is_other_cond :
        ret = insn
    return ret

def get_some_nodes_0 ( supergraph, threshold ) :
    prologue_node   = None
    retn_node       = None
    pre_dispatcher_nodes \
                    = []
    dispatcher_nodes \
                    = []
    for node in supergraph.nodes() :
        if 0 == supergraph.in_degree( node ) :
            assert prologue_node is None
            prologue_node   = node
        if 0 == supergraph.out_degree( node ) and len( node.out_branches ) == 0 :
            assert retn_node is None
            retn_node       = node
        if 1 == supergraph.out_degree( node ) and \
           len( node.out_branches ) == 1 and \
           supergraph.in_degree( node ) >= threshold :
            pre_dispatcher_nodes.append( node )
            dispatcher_nodes.append( list( supergraph.successors( node ) )[0] )

    assert prologue_node is not None
    assert retn_node is not None
    assert pre_dispatcher_nodes
    assert dispatcher_nodes

    print( 'prologue_node: %#x' % prologue_node.addr )
    print( 'retn_node: %#x' % retn_node.addr )
    print( f'pre_dispatcher_nodes[{len(pre_dispatcher_nodes)}]:' )
    for i, node in enumerate( pre_dispatcher_nodes ) :
        print( f'[{i}] {node.addr:#x} ({node.size})' )
    print( f'dispatcher_nodes[{len(dispatcher_nodes)}]:' )
    for i, node in enumerate( dispatcher_nodes ) :
        print( f'[{i}] {node.addr:#x} ({node.size})' )

    return prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes

def get_some_nodes_1 ( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node ) :
    relevant_nodes          = []
    nop_nodes               = []
    pre_dispatcher_addrs    = {node.addr for node in pre_dispatcher_nodes}
    for node in supergraph.nodes() :
        if node.addr == prologue_node.addr or \
           node.addr == retn_node.addr or \
           node.addr in pre_dispatcher_addrs :
            continue
        predecessors    = list( supergraph.predecessors( node ) )
        if not len( predecessors ) :
            continue
        for predecessor in predecessors :
            insn    = get_cond_jmp( proj, predecessor )
            if insn is not None :
                mnemonic    = insn.mnemonic.upper()
                if mnemonic == 'B.EQ' :
                    successors  = list( supergraph.successors( predecessor ) )
                    target      = int( insn.op_str[1:], 16 )
                    assert successors[0].addr == target
                    if node.addr == successors[0].addr :
                        relevant_nodes.append( node )
        if node not in relevant_nodes :
            nop_nodes.append( node )

    assert relevant_nodes

    print( f'nop_nodes[{len(nop_nodes)}]:' )
    for i, node in enumerate( nop_nodes ) :
        print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' )
    print( f'relevant_nodes[{len(relevant_nodes)}]:' )
    for i, node in enumerate( relevant_nodes ) :
        print( f'[{i}] {node.addr:#x} - {node.addr+node.size:#x} ({node.size})' )

    return relevant_nodes, nop_nodes

def symbolic_execution ( proj, keep_blocks, start_addr, hook_addrs, set_value=None ) :

    def retn_procedure ( state ) :
        proj.unhook( state.addr )
        return

    def statement_inspect ( state ) :
        expressions = list( state.scratch.irsb.statements[state.inspect.statement].expressions )
        if len( expressions ) != 0 and isinstance( expressions[0], pyvex.expr.ITE ) :
            state.scratch.temps[expressions[0].cond.tmp]    = set_value
            state.inspect._breakpoints['statement']         = []

    if hook_addrs :
        for addr in hook_addrs :
            proj.hook( addr, retn_procedure, length=4 )

    init_state  = proj.factory.blank_state(
        addr            = start_addr,
        add_options     = {
            angr.options.SYMBOL_FILL_UNCONSTRAINED_MEMORY,
            angr.options.SYMBOL_FILL_UNCONSTRAINED_REGISTERS,
            angr.options.BYPASS_UNSUPPORTED_SYSCALL,
        },
        remove_options  = {
            angr.options.LAZY_SOLVES,
        }
    )
    if set_value is not None :
        init_state.inspect.b( 'statement', when=angr.BP_BEFORE, action=statement_inspect )

    sm          = proj.factory.simulation_manager( init_state )
    sm.step()
    while len( sm.active ) > 0 :
        for state in sm.active :
            if state.addr in keep_blocks :
                return state.addr
        sm.step()

    return None

def get_flow ( proj, prologue_node, relevant_nodes, retn_node ) :
    symbolic_execution_target \
                = [prologue_node]
    symbolic_execution_target.extend( relevant_nodes )

    keep_blocks = [node.addr for node in relevant_nodes]
    keep_blocks.extend( [retn_node.addr,] )
    print( f'keep_blocks[{len(keep_blocks)}]:' )
    for i, addr in enumerate( keep_blocks ) :
        print( f'[{i}] {addr:#x}' )
    keep_blocks = set( keep_blocks )

    flow        = collections.defaultdict( list )
    ins_dict    = {}
    for node in symbolic_execution_target :
        block       = proj.factory.block( node.addr, size=node.size )
        has_branch  = False
        hook_addrs  = set()
        for ins in block.capstone.insns :
            if ins.mnemonic.startswith( 'csel' ) :
                if node not in ins_dict :
                    ins_dict[node]  = ins
                    has_branch      = True
            elif ins.mnemonic in { 'bl', 'blr' }:
                hook_addrs.add( ins.address )

        if has_branch :
            next_addr   = symbolic_execution(
                proj,
                keep_blocks,
                node.addr,
                hook_addrs,
                claripy.BVV( 1, 1 )
            )
            if next_addr is not None :
                flow[node].append( next_addr )
            next_addr   = symbolic_execution(
                proj,
                keep_blocks,
                node.addr,
                hook_addrs,
                claripy.BVV( 0, 1 )
            )
            if next_addr is not None :
                flow[node].append( next_addr )
        else :
            next_addr   = symbolic_execution(
                proj,
                keep_blocks,
                node.addr,
                hook_addrs
            )
            if next_addr is not None :
                flow[node].append( next_addr )

    print( f'flow[{len(flow)}]:' )
    for i, ( k, v ) in enumerate( flow.items() ) :
        print( '[%d] %#x - %#x (%d) ->' % ( i, k.addr, k.addr+k.size, k.size ), [hex(child) for child in v] )

    return ( flow, ins_dict, )

OPCODES = {
    'eq'    : 0x0,
    'ne'    : 0x1,
    'hs'    : 0x2,
    'lo'    : 0x3,
    'mi'    : 0x4,
    'pl'    : 0x5,
    'vs'    : 0x6,
    'vc'    : 0x7,
    'hi'    : 0x8,
    'ls'    : 0x9,
    'ge'    : 0xa,
    'lt'    : 0xb,
    'gt'    : 0xc,
    'le'    : 0xd,
    'nop'   : b'\x1f\x20\x03\xd5',
}

def fill_nop ( proj, buf, addr, size ) :
    nop = OPCODES['nop']
    if proj.arch.memory_endness == "Iend_BE" :
        nop = nop[::-1]
    off = proj.loader.main_object.addr_to_offset( addr )
    for i in range( 0, size, 4 ) :
        buf[off+i]      = nop[0]
        buf[off+i+1]    = nop[1]
        buf[off+i+2]    = nop[2]
        buf[off+i+3]    = nop[3]

def get_j_ins ( f_addr, t_addr, j_type ) :
    if 'b' == j_type :
        if f_addr > t_addr :
            j_ins   = struct.pack( '<I', ( ( 0x14000000 | 0x03ffffff ) - ( f_addr - t_addr - 4 ) // 4 ) )
        else :
            j_ins   = struct.pack( '<I', ( ( 0x14000000 & 0xfc000000 ) + ( t_addr - f_addr ) // 4 ) )
    else :
        j_off   = ( ( ( t_addr - f_addr ) // 4 ) << 5 ) & 0x00ffffe0
        j_opcode = OPCODES[j_type.lower()]
        j_ins   = struct.pack( '<I', 0x54000000 | j_off | j_opcode )
    return j_ins

def patch_ins ( proj, buf, addr, ins ) :
    off     = proj.loader.main_object.addr_to_offset( addr )
    size    = len( ins )
    buf[off:off+size] \
            = ins

def patch_buf ( proj, buf, nop_nodes, flow, ins_dict ) :

    for node in nop_nodes :
        fill_nop( proj, buf, node.addr, node.size )

    for parent, children in flow.items() :
        if 1 == len( children ) :
            insns       = get_insns_of_node( proj, parent )
            insn        = insns[-1]
            mnemonic    = insn.mnemonic.upper()
            addr        = insn.address
            if mnemonic != 'B' :
                addr   += 4
            j_ins   = get_j_ins( addr, children[0], 'b' )
            if proj.arch.memory_endness == "Iend_BE" :
                j_ins   = j_ins[::-1]
            print( 'Patch %#x => %#x' % ( addr, children[0] ) )
            patch_ins( proj, buf, addr, j_ins )
        else :
            ins     = ins_dict[parent]
            j_ins   = get_j_ins( ins.address, children[0], ins.op_str.split(',')[-1].strip() )
            if proj.arch.memory_endness == "Iend_BE" :
                j_ins   = j_ins[::-1]
            print( 'Patch %#x => %#x, %#x' % ( ins.address, children[0], children[1] ) )
            patch_ins( proj, buf, ins.address, j_ins )
            j_ins   = get_j_ins( ins.address+4, children[1], 'b' )
            if proj.arch.memory_endness == "Iend_BE" :
                j_ins   = j_ins[::-1]
            patch_ins( proj, buf, ins.address+4, j_ins )

def dosth ( proj, buf, addr ) :

    print( f'func {addr:#x}' )

    func            = get_func_from_addr( proj, addr )
    supergraph      = am_graph.to_supergraph( func.transition_graph )

    prologue_node, retn_node, pre_dispatcher_nodes, dispatcher_nodes \
                    = get_some_nodes_0( supergraph, 4 )
    print( "" )

    relevant_nodes, nop_nodes \
                    = get_some_nodes_1( proj, supergraph, pre_dispatcher_nodes, prologue_node, retn_node )
    print( "" )

    flow, ins_dict  = get_flow( proj, prologue_node, relevant_nodes, retn_node )
    print( "" )

    patch_buf( proj, buf, nop_nodes, flow, ins_dict )
    print( "" )

def main ( argv ) :
    base_addr   = 0
    proj        = angr.Project(
        argv[1],
        load_options    = {
            'auto_load_libs'    : False,
            'main_opts'         : {
                'base_addr' : base_addr
            }
        }
    )
    cfg         = proj.analyses.CFG(
        force_smart_scan    = False,
        force_complete_scan = True,
        normalize           = True,
        resolve_indirect_jumps \
                            = True,
        fail_fast           = True
    )

    with open( argv[1], 'rb' ) as f :
        buf         = bytearray( f.read() )
        origsize    = len( buf )

    addrlist    = ( 0x51020, 0x51290, 0x514f0 )
    for addr in addrlist :
        dosth( proj, buf, addr )

    assert len( buf ) == origsize
    with open( argv[2], 'wb' ) as f :
        f.write( buf )

if "__main__" == __name__ :
    main( sys.argv )
--------------------------------------------------------------------------

暂不清楚some_cff_small.so使用何种CFF工具生成，不能简单套用标准OLLVM CFF的
反混淆过程。针对此特例，可检查汇编指令，B.EQ指令的跳转目标即"有效块"，另一
分支可舍弃。符号执行与以前的套路相同，x86需要Hook call，ARM需要Hook bl或
blr。恢复控制流与以前的套路相同，x86关注cmov，ARM关注csel。Patch时，对
len(children)为1的情形，检查parent最后一条指令，若非b指令，需Patch下一条指
令，避免破坏原有效代码，这是针对此特例的工程实践踩过的坑。

将来碰上其他ARM AARCH64样本，可在some_cff_patch.py基础上修改，一是寻找识别
"有效块"的办法，二是注意Patch时勿破坏原有效代码。细节部分需要具体样本具体
分析，整体思路不会有大变化。

some_cff_patch.py含有历史遗迹代码，出于某些个人考虑，未精简。

$ python3 some_cff_patch.py some_cff_small.so some_cff_new.so

输出较多，只展示foo()的信息

func 0x51020
prologue_node: 0x51020
retn_node: 0x51278
pre_dispatcher_nodes[2]:
[0] 0x51288 (4)
[1] 0x511ac (4)
dispatcher_nodes[2]:    // 实际未使用
[0] 0x51048 (24)
[1] 0x510e4 (24)

nop_nodes[10]:
[0] 0x51048 - 0x51060 (24)
[1] 0x51060 - 0x51078 (24)
[2] 0x510e4 - 0x510fc (24)
[3] 0x51078 - 0x51090 (24)
[4] 0x510fc - 0x51114 (24)
[5] 0x51090 - 0x510a8 (24)
[6] 0x51114 - 0x5112c (24)
[7] 0x510a8 - 0x510b0 (8)
[8] 0x5112c - 0x51144 (24)
[9] 0x51144 - 0x5114c (8)
relevant_nodes[7]:
[0] 0x510b0 - 0x510e4 (52)
[1] 0x511d8 - 0x5124c (116)
[2] 0x5114c - 0x51174 (40)
[3] 0x51194 - 0x511ac (24)
[4] 0x5124c - 0x51278 (44)
[5] 0x51174 - 0x51194 (32)
[6] 0x511b0 - 0x511d8 (40)

keep_blocks[8]:
[0] 0x510b0
[1] 0x511d8
[2] 0x5114c
[3] 0x51194
[4] 0x5124c
[5] 0x51174
[6] 0x511b0
[7] 0x51278
flow[8]:
[0] 0x51020 - 0x51048 (40) -> ['0x510b0']
[1] 0x510b0 - 0x510e4 (52) -> ['0x5114c']
[2] 0x511d8 - 0x5124c (116) -> ['0x5124c']
[3] 0x5114c - 0x51174 (40) -> ['0x51194', '0x51174']
[4] 0x51194 - 0x511ac (24) -> ['0x511b0']
[5] 0x5124c - 0x51278 (44) -> ['0x510b0']
[6] 0x51174 - 0x51194 (32) -> ['0x511b0']
[7] 0x511b0 - 0x511d8 (40) -> ['0x511d8', '0x51278']

Patch 0x51048 => 0x510b0
Patch 0x510e4 => 0x5114c
Patch 0x51248 => 0x5124c
Patch 0x51168 => 0x51194, 0x51174
Patch 0x511ac => 0x511b0
Patch 0x51274 => 0x510b0
Patch 0x51190 => 0x511b0
Patch 0x511cc => 0x511d8, 0x51278

☆ 用GAMBA对付SUB

用IDA64分析some_cff_new.so

--------------------------------------------------------------------------
__int64 __fastcall foo(__int64 a1, __int64 a2, char a3)
{
    int i;

    for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
        *(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xC2 | *(_BYTE *)(a1 + i) & 0x3D) ^ (~a3 & 0xC2 | a3 & 0x3D);
    return 123LL;
}
--------------------------------------------------------------------------
__int64 __fastcall bar(__int64 a1, __int64 a2, char a3)
{
    int i;

    for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
        *(_BYTE *)(a2 + i) = (~*(_BYTE *)(a1 + i) & 0xB3 | *(_BYTE *)(a1 + i) & 0x4C) ^ (~a3 & 0xB3 | a3 & 0x4C);
    return 610LL;
}
--------------------------------------------------------------------------
__int64 __fastcall baz(__int64 a1, __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6)
{
    if ( a6 < 0 )
        return 0LL;
    if ( ~(~(a2 + 4095) | 0xFFF) < a2 )
        return 0LL;
    return _mmap(a1, a2, a3, a4, a5, a6);
}
--------------------------------------------------------------------------

some_cff_new.so中已经没有CFF了，但有"指令替换"，涉及MBA。

从IDA 8.3起，有内置插件gooMBA，某些情况下可化简MBA。官方blog与众多MBA化简
工具比较后，把gooMBA吹得天花乱坠的，但我用some_cff_new.so实测，未能化简MBA，
失望。相比之下，D810、GAMBA均成功化简some_cff_new.so中的MBA。

$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xc2 | x & 0x3d) ^ (~a3 & 0xc2 | a3 & 0x3d)"
a3^x

$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 8 "(~x & 0xb3 | x & 0x4c) ^ (~a3 & 0xb3 | a3 & 0x4c)"
a3^x

$ python3 /home/scz/src/ollvm/GAMBA/src/simplify_general.py -b 64 "~(~(a2 + 4095) | 0xfff)"
-4096&4095+a2

+的优先级高于&

作为对比，some_normal_small.so中这几个函数如下

--------------------------------------------------------------------------
__int64 __fastcall foo(__int64 a1, __int64 a2, char a3)
{
    int i;

    for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
        *(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3;
    return 123LL;
}
--------------------------------------------------------------------------
__int64 __fastcall bar(__int64 a1, __int64 a2, char a3)
{
    int i;

    for ( i = 0; i < (unsigned __int64)strlen(a1); ++i )
        *(_BYTE *)(a2 + i) = *(_BYTE *)(a1 + i) ^ a3;
    return 610LL;
}
--------------------------------------------------------------------------
__int64 __fastcall baz(__int64 a1, signed __int64 a2, unsigned int a3, unsigned int a4, unsigned int a5, __int64 a6)
{
    if ( a6 < 0 )
        return 0LL;
    if ( (__int64)((a2 + 4095) & 0xFFFFFFFFFFFFF000LL) >= a2 )
        return _mmap(a1, a2, a3, a4, a5, a6);
    return 0LL;
}
--------------------------------------------------------------------------