objdump -S -d elf-file
通常并不足够,因为它缺少.data
部分。
但似乎objdump -S -D elf-file
就足够了。
为了试试这个,我写了一个小的x86-64汇编文件,它使用extern printf
,与YASM汇编而没有调试符号,并与GCC链接。
[bits 64]
; yasm -f elf64 -m amd64 1st_generation.asm -o 1st_generation.o; gcc -o 1st_generation 1st_generation.o
section .text
global main
extern printf
main:
push rbp
mov rbp,rsp
lea rdi,[msg]
mov rsi,[num]
xor eax,eax
call printf
mov eax,60
xor ebx,ebx
syscall
section .data
msg db 'abcdef = %d', 0xa, 0
num dd 1337
testmsg1:
db "test 01", 0x0a, 0
然后我用objdump -S -D -M intel elf-file >objdump_output.txt
拆开它。 -M intel
产生英特尔格式的反汇编。 AT & T也可以工作,但为了清晰起见,我更喜欢英特尔格式。
然后我写了一个小的gawk
程序objdump_to_asm
将objdump -S -D -M intel elf-file >objdump_output.txt
产生的拆卸转换成YASM的合适格式。假定x86-64代码和main
作为入口点。可以很容易地编辑到不同类型的环境(x86是微不足道的,其他可能需要更多的工作)。用法./objdump_to_asm objdump_output.txt
。有趣的是,第一代可执行文件的大小为6598字节,而第二代可执行文件的大小仅为6496字节。第三代汇编代码与第二代汇编代码相同。
下面的代码:
#!/usr/bin/awk -f
BEGIN{
disassembly_of_section_string = "Disassembly of section ";
sections_to_discard[1] = ".interp";
sections_to_discard[2] = ".note.ABI-tag";
sections_to_discard[3] = ".note.gnu.build-id";
sections_to_discard[4] = ".dynsym";
sections_to_discard[5] = ".dynstr";
sections_to_discard[6] = ".hash";
sections_to_discard[7] = ".gnu.hash";
sections_to_discard[8] = ".gnu.version";
sections_to_discard[9] = ".gnu.version_r";
sections_to_discard[10] = ".rela.dyn";
sections_to_discard[11] = ".rela.init";
sections_to_discard[12] = ".eh_frame";
sections_to_discard[13] = ".dynamic";
sections_to_discard[14] = ".got";
sections_to_discard[15] = ".got.plt";
sections_to_discard[16] = ".jcr";
sections_to_discard[17] = ".init_array";
sections_to_discard[18] = ".comment";
sections_to_discard[19] = ".note.gnu.gold-version";
number_of_sections_to_discard = length(sections_to_discard);
sections_to_handle[1] = ".plt";
sections_to_handle[2] = ".text";
sections_to_handle[3] = ".data";
sections_to_handle[4] = ".bss";
number_of_sections_to_handle = length(sections_to_handle);
blocks_to_discard_in_text[1] = "<call_gmon_start>:";
blocks_to_discard_in_text[2] = "<deregister_tm_clones>:";
blocks_to_discard_in_text[3] = "<register_tm_clones>:";
blocks_to_discard_in_text[4] = "<__do_global_dtors_aux>:";
blocks_to_discard_in_text[5] = "<frame_dummy>:"
blocks_to_discard_in_text[6] = "<__libc_csu_fini>:"
blocks_to_discard_in_text[7] = "<__libc_csu_init>:"
blocks_to_discard_in_text[8] = "<_start>:"; # !!!
number_of_blocks_to_discard_in_text = length(blocks_to_discard_in_text);
blocks_to_handle_in_text[1] = "main"
number_of_blocks_to_handle_in_text = length(blocks_to_handle_in_text);
blocks_to_handle_in_data[1] = "__dso_handle"
number_of_blocks_to_handle_in_data = length(blocks_to_handle_in_data);
externs_to_handle[1] = "printf";
number_of_externs_to_handle = length(externs_to_handle);
hexdump_start_byte = 11;
disassembly_start_byte = 33;
current_section = "";
getline;
getline;
file_format_index = match($0, "file format elf64-x86-64")
if (file_format_index > 0)
{
print "[bits 64]";
}
}
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to sections_to_handle .
while (i <= number_of_sections_to_handle)
{
match_index = match($0, (disassembly_of_section_string sections_to_handle[i]));
if (match_index > 0) # we have a section to handle.
{
current_section = sections_to_handle[i];
getline;
break;
}
i++;
}
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to sections_to_discard .
while (i <= number_of_sections_to_discard)
{
match_index = match($0, (disassembly_of_section_string sections_to_discard[i]));
if (match_index > 0) # we have a section to discard.
{
current_section = sections_to_discard[i];
getline;
break;
}
i++;
}
if (match (current_section, ".plt"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to externs_to_handle.
while (i <= number_of_externs_to_handle)
{
match_index = match($0, ("<" externs_to_handle[i] "@plt>:"));
if (match_index > 0) # we have an extern to handle.
{
print "extern " externs_to_handle[i];
getline;
break;
}
i++;
}
}
if (match (current_section, ".text"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to the blocks of section .text .
while (i <= number_of_blocks_to_handle_in_text)
{
match_index = match($0, ("<" blocks_to_handle_in_text[i] ">:"));
if (match_index > 0) # we have a block to handle.
{
print "section .text";
print "global main";
print blocks_to_handle_in_text[i] ":";
getline;
while ((length ($0)) > 0)
{
disassembly_without_hex_bytes = substr($0, disassembly_start_byte);
disassembly_without_hex_bytes = gensub(/PTR /, "", "g", disassembly_without_hex_bytes);
disassembly_without_hex_bytes = gensub(/(ds:)([a-z0-9]*)/, "[\\2]", "g", disassembly_without_hex_bytes);
match_index = 0; # 0 : no match, > 0 : match.
j = 1; # index to externs to handle.
while (j <= number_of_externs_to_handle)
{
match_index = match(disassembly_without_hex_bytes, ("<" externs_to_handle[i] "@plt>"));
if (match_index > 0) # we have an extern to handle.
{
current_extern_to_handle = externs_to_handle[j];
"echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\(<\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'" |& getline disassembly_without_hex_bytes;
close("echo '" disassembly_without_hex_bytes "' | sed 's/\\([0-9]*\\)\\(<\\)\\(" current_extern_to_handle "\\)\\(@plt>\\)/\\3/g'");
break;
}
j++;
}
if (match(disassembly_without_hex_bytes, "data32") != 1)
{
print disassembly_without_hex_bytes;
}
getline;
}
break;
}
i++;
}
}
if (match (current_section, ".data"))
{
match_index = 0; # 0 : no match, > 0 : match.
i = 1; # index to the blocks of section .data .
while (i <= number_of_blocks_to_handle_in_data)
{
match_index = match($0, ("<" blocks_to_handle_in_data[i] ">:"));
if (match_index > 0) # we have a block to handle.
{
print "section .data";
getline;
while ((length ($0)) > 0)
{
getline;
hexdump_only = substr($0, hexdump_start_byte, (disassembly_start_byte - hexdump_start_byte));
hexdump_only = gensub(/([[:alnum:]]+)/, "0x\\1", "g", hexdump_only);
hexdump_only = gensub(/(0x[[:alnum:]]+)()(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
hexdump_only = gensub(/(0x[[:alnum:]]+)()(0x[[:alnum:]]+)/, "\\1, \\3", "g", hexdump_only);
if (match (hexdump_only, "0x") > 0)
{
print "db " hexdump_only;
}
}
break;
}
i++;
}
}
}
执行./objdump_to_asm objdump_output.txt >2nd_generation.asm
产生以下汇编文件。与YASM组装,与GCC链接。汇编和链接的可执行文件与原始文件不完全相同,实际上它是6496字节,而原始可执行文件的大小为6568字节。
[bits 64]
extern printf
section .text
global main
main:
push rbp
mov rbp,rsp
lea rdi,[0x401958]
mov rsi,QWORD [0x401965]
xor eax,eax
call printf
mov eax,0x3c
xor ebx,ebx
syscall
section .data
db 0x61
db 0x62
db 0x63, 0x64, 0x65, 0x66
db 0x20, 0x3d, 0x20, 0x25, 0x64, 0x0a
db 0x00, 0x39
db 0x05, 0x00, 0x00, 0x74, 0x65
db 0x73, 0x74
db 0x20, 0x30
db 0x31, 0x0a
db 0x00, 0x00
这很棒!谢谢你nrz! – computereasy
@computereasy不客气。 – nrz