标题: Unix系列(19)--从ELF抠代码 创建: 2025-09-13 09:30 修改: 2025-11-13 15:29 链接: https://scz.617.cn/unix/202509130930.txt -------------------------------------------------------------------------- 目录: ☆ 原始需求 ☆ 交叉编译环境 1) gcc ☆ 自制测试用例 1) hello.c ☆ 手工构造ELF 1) hello_arm64.json 3) skeleton.c 4) smallelf_arm64_2.py ☆ 参考资源 -------------------------------------------------------------------------- ☆ 原始需求 有个ARM64的ELF,并不打算真地执行它,只是Angr符号执行时需要处理其中某个目标 函数,额外涉及strlen()这种库函数。这个ELF很大,想用某种手段切掉大量无关函 数,只保留目标函数及必要的ELF信息,将切割剩下的ELF留作测试样本。这个需求有 什么好的满足方式?不想留一个24MB的so做测试样本。 计划在x64中交叉编译ARM64版hello.c,其中含有若干目标函数,作为后续研究对象。 摸索出各种解决方案后,再实测初始样本。 ☆ 交叉编译环境 1) gcc 参[1],下载交叉编译工具链。 mkdir /home/scz/src/aarch64-none-linux-gnu cd /home/scz/src/aarch64-none-linux-gnu tar xfJ /tmp/arm-gnu-toolchain-14.3.rel1-x86_64-aarch64-none-linux-gnu.tar.xz --strip-components=1 "--strip-components=N"必须放在尾部,解包时将路径的前N层目录去掉。 ☆ 自制测试用例 1) hello.c -------------------------------------------------------------------------- #if 0 export PATH=/home/scz/src/aarch64-none-linux-gnu/bin:$PATH aarch64-none-linux-gnu-gcc -Wall -pipe -O3 -s -o hello_arm64 hello.c #endif #include #include #include __attribute__((optimize("O0"), noinline, used)) static unsigned int bar ( char *in, char *out, char key ) { int i; for ( i = 0; i < strlen( in ); i++ ) { out[i] = in[i] ^ key; } return 0x5120LL; } __attribute__((optimize("O0"), noinline, used)) static unsigned int baz ( char *in, char *out, char key ) { int i; for ( i = 0; i < strlen( in ); i++ ) { out[i] = in[i] ^ key; } return 0x1314LL; } __attribute__((optimize("O0"), noinline, used)) static unsigned int foo ( unsigned int n ) { unsigned int mod = n % 4; unsigned int ret = 0; if ( mod == 0 ) { ret = ( n | 0xbaaad0bf ) * ( 2 ^ n ); } else if ( mod == 1 ) { ret = ( n & 0xbaaad0bf ) * ( 3 + n ); } else if ( mod == 2 ) { ret = ( n ^ 0xbaaad0bf ) * ( 4 | n ); } else { ret = ( n + 0xbaaad0bf ) * ( 5 & n ); } return ret; } __attribute__((optimize("O0"))) int main ( int argc, char * argv[] ) { unsigned int n, key; if ( argc < 2 ) { fprintf( stderr, "Usage: %s \n", argv[0] ); return -1; } n = (unsigned int)strtoul( argv[1], NULL, 0 ); key = foo( n ); fprintf( stdout, "n=%#x key=%#x\n", n, key ); n = bar( argv[0], argv[0], key ); fprintf( stdout, "n=%#x\n", n ); n = baz( argv[0], argv[0], key ); fprintf( stdout, "n=%#x\n", n ); return 0; } -------------------------------------------------------------------------- hello.c用了些技巧,确保foo()、bar()、baz()函数体保留在ELF中。初版main()并 未调用bar()、baz(),必须使用attribute达成目的。终版main()实际调用了它们, attribute显得冗余,出于演示目的,保留之。 ☆ 手工构造ELF 可用某些Python模块,手工构造ELF。抠取foo、bar、baz函数体,置于.text。为便 于IDA分析,将foo、bar、baz加入.symtab、.dynsym节。这些函数体中可能含有bl指 令,调用其他函数。需修改bl目标,全部跳转到stub函数,这是只含ret指令的空函 数。假设多条bl指令的目标相同,修改后跳转到同一stub函数。不同bl目标对应不同 stub函数,将来给不同的stub函数赋不同的名字,便于IDA分析。大致布局如下 foo + pad + bar + pad + baz + pad + stub + pad + stub + pad 手工构造所得ELF并不打算真地执行,仅用于IDA静态分析、Angr模拟执行。 1) hello_arm64.json -------------------------------------------------------------------------- { "foo": { "off": "0x8a0", "size": "0xd8" }, "bar": { "off": "0x7a0", "size": "0x7c", "bl": [ { "off": "0x800", "size": 4, "target": "strlen" } ] }, "baz": { "off": "0x820", "size": "0x7c", "bl": [ { "off": "0x880", "size": 4, "target": "strlen" } ] } } -------------------------------------------------------------------------- 下面所说的偏移,全部是相对于文件首字节的偏移。 foo在偏移0x8a0处,函数体0xd8字节,不含bl指令。 bar在偏移0x7a0处,函数体0x7c字节。偏移0x800处有4字节"bl strlen"。 baz在偏移0x820处,函数体0x7c字节。偏移0x880处有4字节"bl strlen"。 这个json提供的信息,可用反汇编引擎自动获取,尤其bl指令的偏移。 3) skeleton.c -------------------------------------------------------------------------- #if 0 aarch64-none-linux-gnu-gcc -Wall -pipe -O0 -fPIC -shared -s -o libskeleton.so skeleton.c #endif __attribute__((optimize("O0"), noinline, used)) unsigned int dummy ( void ) { return 0x51201314LL; } -------------------------------------------------------------------------- 由skeleton.c正常编译得到libskeleton.so,在其基础上添加内容。 4) smallelf_arm64_2.py -------------------------------------------------------------------------- #!/usr/bin/env python # -*- encoding: utf-8 -*- # # python3 smallelf_arm64_2.py # import sys, json import collections import lief def to_int ( x ) : if isinstance( x, int ) : return x if isinstance( x, str ) : return int( x, 0 ) def loadjson ( filename ) : with open( filename, "r" ) as f : data = json.load( f ) for func, meta in data.items() : meta["off"] = to_int( meta["off"] ) meta["size"] = to_int( meta["size"] ) if "bl" in meta : for bl in meta["bl"] : bl["off"] = to_int( bl["off"] ) bl["size"] = to_int( bl["size"] ) return data def getbuf ( filename, off, size, pad=True ) : with open( filename, "rb" ) as f : f.seek( off ) buf = bytearray( f.read( size ) ) if pad : padsize = ( 16 - ( len( buf ) % 16 ) ) % 16 if padsize : buf.extend( b"\x00" * padsize ) return buf def prepare_info ( filename, info ) : STUB = bytearray( b'\xc0\x03\x5f\xd6' + b'\0' * 12 ) out = collections.OrderedDict() internal_targets \ = [] external_targets \ = [] for meta in info.values() : for bl in meta.get( "bl", [] ) : t = bl["target"] if t in info : if t not in internal_targets : internal_targets.append( t ) else : if t not in external_targets : external_targets.append( t ) func_base = 0 func_bases = {} for fname, meta in info.items() : off = meta["off"] size = meta["size"] buf = getbuf( filename, off, size, True ) out[fname] \ = ( buf, size, 1 ) func_bases[fname] \ = func_base func_base \ += len( buf ) stub_base = func_base stub_bases = {} for t in external_targets : stub_bases[t] = stub_base stub_base += len( STUB ) for fname, meta in info.items() : buf, _, _ = out[fname] func_base = func_bases[fname] func_off = meta["off"] for bl in meta.get( "bl", [] ) : off = bl["off"] - func_off t = bl["target"] saddr = func_base + off if t in func_bases : daddr = func_bases[t] else : daddr = stub_bases[t] imm = ( daddr - saddr ) >> 2 imm26 = imm & 0x3ffffff insn = 0x94000000 | imm26 buf[off:off+4] \ = insn.to_bytes( 4, "little" ) for t in external_targets : out[t] = ( STUB, 4, 0 ) return out def build_elf ( filename, funcs ) : binary = lief.ELF.parse( 'libskeleton.so' ) extcode_sec = lief.ELF.Section( ".extcode", lief.ELF.Section.TYPE.PROGBITS ) extcode_sec.add( lief.ELF.Section.FLAGS.ALLOC | lief.ELF.Section.FLAGS.EXECINSTR ) extcode_sec.alignment \ = 16 extcode_sec.content \ = list( b''.join( buf for buf, _, _ in funcs.values() ) ) binary.add( extcode_sec ) sym = lief.ELF.Symbol() sym.name = '' sym.value = 0 sym.type = lief.ELF.Symbol.TYPE.NOTYPE sym.binding = lief.ELF.Symbol.BINDING.LOCAL sym.visibility \ = lief.ELF.Symbol.VISIBILITY.DEFAULT sym.shndx = 0 binary.add_symtab_symbol( sym ) extcode_sec = binary.get_section( ".extcode" ) off = 0 for name, ( buf, size, type ) in funcs.items() : sym = lief.ELF.Symbol() sym.name = name sym.value = extcode_sec.virtual_address + off sym.size = size sym.type = lief.ELF.Symbol.TYPE.FUNC if type : sym.binding = lief.ELF.Symbol.BINDING.GLOBAL sym.visibility \ = lief.ELF.Symbol.VISIBILITY.DEFAULT else : sym.binding = lief.ELF.Symbol.BINDING.LOCAL sym.visibility \ = lief.ELF.Symbol.VISIBILITY.HIDDEN sym.shndx = binary.get_section_idx( ".extcode" ) if type : binary.add_dynamic_symbol( sym ) else : binary.add_symtab_symbol( sym ) off += len( buf ) builder = lief.ELF.Builder( binary ) builder.build() builder.write( filename ) def main ( argv ) : ret = False while True : if len( argv ) != 4 : print( f"Usage: python3 {argv[0]} " ) break oldelf = argv[1] jsonf = argv[2] newelf = argv[3] info = loadjson( jsonf ) funcs = prepare_info( oldelf, info ) build_elf( newelf, funcs ) ret = True break return ret if "__main__" == __name__ : ret = main( sys.argv ) sys.exit( 0 if ret else -1 ) -------------------------------------------------------------------------- python3 smallelf_arm64_2.py hello_arm64 hello_arm64.json hello_arm64_small_1 aarch64-none-linux-gnu-readelf -Wa hello_arm64_small_1 ☆ 参考资源 [1] Arm GNU Toolchain Downloads https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads x86_64 Linux hosted cross toolchains: AArch64 GNU/Linux target (aarch64-none-linux-gnu) https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/arm-gnu-toolchain-14.3.rel1-x86_64-aarch64-none-linux-gnu.tar.xz Windows (mingw-w64-x86_64) hosted cross toolchains: AArch64 GNU/Linux target (aarch64-none-linux-gnu) https://developer.arm.com/-/media/Files/downloads/gnu/14.3.rel1/binrel/arm-gnu-toolchain-14.3.rel1-mingw-w64-x86_64-aarch64-none-linux-gnu.zip [2] https://github.com/doronz88/simpleelf [3] LIEF - Library to Instrument Executable Formats https://github.com/lief-project/LIEF https://lief.re/doc/latest/index.html https://lief.re/doc/latest/api/binary_abstraction/index.html https://lief.re/doc/latest/api/binary_abstraction/python.html https://lief.re/doc/latest/formats/elf/index.html https://lief.re/doc/latest/formats/elf/python.html New ELF Builder - Romain Thomas [2022-01-23] https://lief.re/blog/2022-01-23-new-elf-builder/ lief-patchelf https://lief.re/blog/2025-07-13-patchelf/ https://lief.re/doc/latest/tools/lief-patchelf/index.html https://github.com/lief-project/LIEF/tree/main/tools/lief-patchelf [4] A tool to edit .dynsym symbols in ELF files - [2022-09-27] https://softwarerecs.stackexchange.com/questions/84123/a-tool-to-edit-dynsym-symbols-in-elf-files 使用LIEF库为ELF文件添加新Section - headedit [2025-09-22] https://www.52pojie.cn/thread-2061926-1-1.html