之前我们使用的busybox是静态编译的,在init_post里我们跳过了interpreter的部分.这里补回来.
再次编译一个busybox,这次不勾选static build.
怎么查看一个可执行文件依赖的类库呢? 我们之前写了个print_elf64的小程序,用来打印出来可执行文件的信息.
program_header_table里的PT_INTERP指定的 /lib64/ld-linux-x86-64.so.2 肯定是需要的.
其它依赖的类库都在PT_DYNAMIC里,d_tag为DT_NEEDED的,其d_un的值指向了DT_STRTAB的index.
@see http://www.sco.com/developers/gabi/latest/ch5.dynamic.html
如果DT_NEEDED的路径不是绝对路径,其查找有4种方法:
1. DT_RPATH (没有DT_RUNPATH的情况下)
2. LD_LIBRARY_PATH
3. DT_RUNPATH
4. /usr/lib或者其它指定的
./print_elf64 ../busybox.dyn
Program header table:
0: type = PT_PHDR RX offset = 0x40 size = 504
1: type = PT_INTERP R offset = 0x238 size = 28
/lib64/ld-linux-x86-64.so.2
2: type = PT_LOAD RX offset = 0 size = 654780 p_vaddr = 0x400000 p_memsz = 0x9fdbc
3: type = PT_LOAD RW offset = 0x9fe10 size = 3807 p_vaddr = 0x69fe10 p_memsz = 0x3370
4: type = PT_DYNAMIC RW offset = 0x9fe38 size = 416
d_tag = DT_NEEDED d_un = 0x1
d_tag = DT_NEEDED d_un = 0x49
d_tag = DT_INIT d_un = 0x405458
d_tag = DT_FINI d_un = 0x48440c
d_tag = unknown d_un = 0x400298
d_tag = DT_STRTAB d_un = 0x402480
d_tag = DT_SYMTAB d_un = 0x400350
hexdump -C -n 500 -s 0x2480 busybox.dyn
00002480 00 6c 69 62 6d 2e 73 6f 2e 36 00 5f 5f 67 6d 6f |.libm.so.6.__gmo|
00002490 6e 5f 73 74 61 72 74 5f 5f 00 5f 4a 76 5f 52 65 |n_start__._Jv_Re|
000024a0 67 69 73 74 65 72 43 6c 61 73 73 65 73 00 65 78 |gisterClasses.ex|
000024b0 70 00 73 69 6e 00 70 6f 77 00 61 74 61 6e 32 00 |p.sin.pow.atan2.|
000024c0 73 71 72 74 00 63 6f 73 00 6c 69 62 63 2e 73 6f |sqrt.cos.libc.so|
000024d0 2e 36 00 73 63 68 65 64 5f 67 65 74 5f 70 72 69 |.6.sched_get_pri|
000024e0 6f 72 69 74 79 5f 6d 69 6e 00 6b 6c 6f 67 63 74 |ority_min.klogct|
000024f0 6c 00 73 65 74 75 69 64 00 63 68 72 6f 6f 74 00 |l.setuid.chroot.|
00002500 73 74 72 63 61 73 65 73 74 72 00 73 6f 63 6b 65 |strcasestr.socke|
00002510 74 00 70 75 74 63 68 61 72 5f 75 6e 6c 6f 63 6b |t.putchar_unlock|
00002520 65 64 00 6d 6b 64 74 65 6d 70 00 67 6c 6f 62 66 |ed.mkdtemp.globf|
00002530 72 65 65 36 34 00 66 66 6c 75 73 68 00 70 75 74 |ree64.fflush.put|
00002540 63 5f 75 6e 6c 6f 63 6b 65 64 00 73 74 72 63 70 |c_unlocked.strcp|
00002550 79 00 73 69 67 73 75 73 70 65 6e 64 00 73 68 6d |y.sigsuspend.shm|
00002560 67 65 74 00 67 65 74 68 6f 73 74 69 64 00 67 6d |get.gethostid.gm|
00002570 74 69 6d 65 5f 72 00 66 63 68 6d 6f 64 00 5f 5f |time_r.fchmod.__|
00002580 70 72 69 6e 74 66 5f 63 68 6b 00 61 63 63 74 00 |printf_chk.acct.|
00002590 65 78 65 63 6c 00 66 6e 6d 61 74 63 68 00 67 6e |execl.fnmatch.gn|
000025a0 75 5f 64 65 76 5f 6d 61 6a 6f 72 00 65 78 65 63 |u_dev_major.exec|
000025b0 76 00 73 72 61 6e 64 00 74 74 79 6e 61 6d 65 5f |v.srand.ttyname_|
000025c0 72 00 73 74 72 73 69 67 6e 61 6c 00 73 74 72 6e |r.strsignal.strn|
000025d0 63 6d 70 00 69 6e 65 74 5f 61 74 6f 6e 00 6f 70 |cmp.inet_aton.op|
000025e0 74 69 6e 64 00 73 74 72 72 63 68 72 00 5f 5f 6c |tind.strrchr.__l|
000025f0 6f 6e 67 6a 6d 70 5f 63 68 6b 00 72 65 67 65 78 |ongjmp_chk.regex|
00002600 65 63 00 70 69 70 65 00 74 63 64 72 61 69 6e 00 |ec.pipe.tcdrain.|
00002610 63 66 6d 61 6b 65 72 61 77 00 73 68 6d 61 74 00 |cfmakeraw.shmat.|
00002620 63 6f 6e 6e 65 63 74 00 5f 5f 6f 70 65 6e 36 34 |connect.__open64|
00002630 5f 32 00 6c 75 74 69 6d 65 73 00 6d 75 6e 6c 6f |_2.lutimes.munlo|
00002640 63 6b 00 66 74 72 75 6e 63 61 74 65 36 34 00 6d |ck.ftruncate64.m|
00002650 6d 61 70 36 34 00 73 65 6d 63 74 6c 00 63 6c 6f |map64.semctl.clo|
00002660 73 65 64 69 72 00 69 6e 65 74 5f 6e 74 6f 61 00 |sedir.inet_ntoa.|
00002670 67 6c 6f 62 |glob|
00002674
DT_STRTAB + 0x1 = "libm.so.6"
DT_STRTAB + 0x0x49 = 0x24c9 = "libc.so.6"
我们新写个脚本 build-initrd-img-busybox-dyn.sh 重新制作一个initrd.img.gz
load_elf_interp(struct elfhdr *interp_elf_ex, struct file *interpreter, unsigned long *interp_map_addr, unsigned long no_base)
// 在load_elf_interp之前,新的pgd已经load到CR3里了,busybox的code,data,bss都已经好了.
/* ./print_elf64 /lib64/ld-linux-x86-64.so.2 可以看到
ELF header:
header size = 0x40
program header table at = 0x40, entry count = 7, entry size = 0x38
section header table at = 0x24120, entry count = 24, entry size = 0x40
e_entry = 0x16b0
e_type = 0x3 (ET_EXEC = 0x2, ET_DYN = 0x3, ET_CORE = 0x4)
e_machine = 0x3e (ET_X86_64 = 0x3e)
e_shstrndx = 23
Program header table:
0: type = PT_LOAD RX offset = 0 size = 138880 p_vaddr = 0 p_memsz = 0x21e80
1: type = PT_LOAD RW offset = 0x22b70 size = 5320 p_vaddr = 0x222b70 p_memsz = 0x1758
2: type = PT_DYNAMIC RW offset = 0x22e28 size = 400
d_tag = DT_SONAME d_un = 0x139
d_tag = DT_INIT_ARRAY d_un = 0x222b70
d_tag = DT_INIT_ARRAYSZ d_un = 0x8
d_tag = DT_HASH d_un = 0x1f0
d_tag = unknown d_un = 0x2a8
d_tag = DT_STRTAB d_un = 0x608
d_tag = DT_SYMTAB d_un = 0x380
d_tag = DT_STRSZ d_un = 0x17c
...
GOT:
0: 0x222e28
1: 0x224238
2: 0
3: 0
GOT.PLT:
0: 0x222e28
1: 0
2: 0
3: 0xa96
4: 0xaa6
5: 0xab6
6: 0xac6
7: 0xad6
8: 0xae6
PT_DYNAMIC 的 d_tag = DT_SONAME 指明了这个so文件的名称.
*/
load_elf_interp先把ld-linux-x86-64.so.2的program header table读取到内存里.
然后根据PT_LOAD计算出total_mapping_size = 0x222b70 + 0x1758 - 0 = 0x2242c8.
接下来把PT_LOAD segments都调用elf_map做好map.
这里的.so做map和前边的busybox做map有一点区别是,第一次做map时,map的地址范围要足够大,这也是要计算total_mapping_size的原因.
map_addr = elf_map(0, total_mapping_size=0x2242c8) = 0x7F566BF12000
elf_map(map_addr + p_vaddr, 0) = (0x7F566BF12000 + 0x22b70 = 0x7F566C134B70, 0)
其实就是先map一大块,然后后边的从这一块的当中开始map.
接下来一样要padzero bss,把memsz比filesz多出来的部分给做好map.
整理一下当前的vma:
0x400000-0x4a0000 (busybox.dyn的第一个PT_LOAD .text)
0x69f000-0x6a1000 (busybox.dyn的第二个PT_LOAD .data和一部分.bss)
0x6a1000-0x6a4000 (busybox.dyn的.bss)
0x7f566bf12000-0x7f566bf34000 (ld-linux-x86-64.so.2的第一个PT_LOAD .text)
0x7f566c134000-0x7f566c137000 (ld-linux-x86-64.so.2的第二个PT_LOAD .data和.bss)
0x7fff294a4000-0x7fff294c6000 (busybox.dyn的stack)
这里有一点让人不明白,为什么busybox.dyn的.bss要分成两个vma,而ld-linux-x86-64.so.2的一个就行了.
load_elf_interp返回了ld-linux-x86-64.so.2的map_addr,再加上ld-linux-x86-64.so.2的e_entry,就定下来了进入user space后第一条要执行的指令了.
0x7f566bf12000 + 0x16b0 = 0x7f566bf136b0
接下来要想搞清楚ld-linux-x86-64.so.2是怎么把其它依赖的类库加载进来以及怎么开始执行busybox.dyn的,就要看ld-linux-x86-64.so.2的实现了.
/lib64/ld-linux-x86-64.so.2其实就是/lib/x86_64-linux-gnu/ld-2.15.so,
dpkg -S ld-2.15.so
查找可知, ld-2.15.so是libc6里的.
apt-get source libc6
拿到源码.
man ld.so
// @see http://www.cs.virginia.edu/~dww4s/articles/ld_linux.html
// ld.so 这个make target定义在 eglibc-2.15/elf/Makefile#418
// @see https://sourceware.org/glibc/wiki/DynamicLoader
_dl_start的一开始,先得到自身被加载到内存的地址,再加上_DYNAMIC的地址,得到PT_DYNAMIC segment的在内存里的实际地址.
// x86_64的System V Application Binary Interface里说了,GOT的第一个entry(GOT[0])里保存着Dynamic Section的地址. symbol _DYNAMIC也指向这个地址.
// ld-linux-x86-64.so.2的symbol table里没有 _DYNAMIC.
// 从上边print_elf64打印出来的信息我们知道,ld-linux-x86-64.so.2的第二个PT_LOAD,也就是.data segment被加载到内存的vaddr offset = 0x222b70, filesz = 5320,
// 所以.data segment在0x222b70 + 5320 = 0x224038处结束. 而 Dynamic Section 的 vaddr = 0x222e28. 在 .data 里.
然后调用elf_get_dynamic_info()读取PT_DYNAMIC segments的内容,我们在写print_elf64这个小程序时已经读取过.
glibc的代码太难看了.不看了.
@see http://www.airs.com/blog/page/4?s=linkers
为了更好的理解shared objects,我们分析个小文件看看.
// a.c
#include <stdint.h>
uint64_t global1 = 0x11;
uint64_t global2 = 0x22;
uint64_t global3 = 0x33;
int sum_global(void)
{
return global1 + global2 + global3;
}
int sum_global2(void)
{
return sum_global() + sum_global();
}
int sum_global4(void)
{
return sum_global2() + sum_global2();
}
// 编译成a.so
// gcc -fpic -shared -nostdlib -o a.so a.c
./print_elf64 a.so
ELF header:
header size = 0x40
program header table at = 0x40, entry count = 7, entry size = 0x38
section header table at = 0x10f8, entry count = 19, entry size = 0x40
e_entry = 0x440
e_type = 0x3 (ET_EXEC = 0x2, ET_DYN = 0x3, ET_CORE = 0x4)
e_machine = 0x3e (ET_X86_64 = 0x3e)
e_shstrndx = 16
Program header table:
0: type = PT_LOAD RX offset = 0 size = 1352 p_vaddr = 0 p_memsz = 0x548
1: type = PT_LOAD RW offset = 0xeb0 size = 376 p_vaddr = 0x200eb0 p_memsz = 0x178
2: type = PT_DYNAMIC RW offset = 0xeb0 size = 288
d_tag = unknown d_un = 0x1f0
d_tag = DT_STRTAB d_un = 0x340
d_tag = DT_SYMTAB d_un = 0x238
d_tag = DT_STRSZ d_un = 0x4c
d_tag = DT_SYMENT d_un = 0x18
d_tag = DT_PLTGOT d_un = 0x200fe8
d_tag = DT_PLTRELSZ d_un = 0x30
d_tag = DT_PLTREL d_un = 0x7
d_tag = DT_JMPREL d_un = 0x3d8
d_tag = DT_RELA d_un = 0x390
d_tag = DT_RELASZ d_un = 0x48
d_tag = DT_RELAENT d_un = 0x18
3: type = PT_NOTE R offset = 0x1c8 size = 36
name size = 4, desc size = 20, type = 3
name: 0x47 (G) 0x4e (N) 0x55 (U) 0
desc: 0xf2 0xbd 0x69 0x58 0xd1 0x8f 0x6c 0xc 0x44 0x24 0x4d 0xff 0x2e 0xe4 0xe8 0xe2 0x24 0x3d 0xb 0x16
4: type = PT_GNU_EH_FRAME R offset = 0x4a8 size = 36
5: type = PT_GNU_STACK RW offset = 0 size = 0
6: type = PT_GNU_RELRO R offset = 0xeb0 size = 336
Find section name string table:
offset = 0x1052, size = 160
Section header table:
0: name = type = SHT_NULL offset = 0 size = 0
1: name = .note.gnu.build-id type = SHT_NOTE offset = 0x1c8 size = 36
2: name = .gnu.hash type = SHT_GNU_HASH offset = 0x1f0 size = 72
3: name = .dynsym type = SHT_DYNSYM offset = 0x238 size = 264
4: name = .dynstr type = SHT_STRTAB offset = 0x340 size = 76
5: name = .rela.dyn type = SHT_RELA offset = 0x390 size = 72
sh_link = .dynsym, sh_info =
r_offset = 0x200fd8, r_info = (SYM = 7, TYPE = 0x6), r_addend = 0
r_offset = 0x200fe0, r_info = (SYM = 3, TYPE = 0x6), r_addend = 0
6: name = .rela.plt type = SHT_RELA offset = 0x3d8 size = 48
sh_link = .dynsym, sh_info = .plt
r_offset = 0x201008, r_info = (SYM = 10, TYPE = 0x7), r_addend = 0
7: name = .plt type = SHT_PROGBITS offset = 0x410 size = 48
8: name = .text type = SHT_PROGBITS offset = 0x440 size = 102
9: name = .eh_frame_hdr type = SHT_PROGBITS offset = 0x4a8 size = 36
10: name = .eh_frame type = SHT_PROGBITS offset = 0x4d0 size = 120
11: name = .dynamic type = SHT_DYNAMIC offset = 0xeb0 size = 288
12: name = .got type = SHT_PROGBITS offset = 0xfd0 size = 24
13: name = .got.plt type = SHT_PROGBITS offset = 0xfe8 size = 40
14: name = .data type = SHT_PROGBITS offset = 0x1010 size = 24
15: name = .comment type = SHT_PROGBITS offset = 0x1028 size = 42
16: name = .shstrtab type = SHT_STRTAB offset = 0x1052 size = 160
17: name = .symtab type = SHT_SYMTAB offset = 0x15b8 size = 672
18: name = .strtab type = SHT_STRTAB offset = 0x1858 size = 119
Found symbol table:
0: name = value = 0 size = 0 type = STT_NOTYPE binding = STB_LOCAL defined section index = SHN_UNDEF
1: name = value = 0x1c8 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .note.gnu.build-id
2: name = value = 0x1f0 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .gnu.hash
3: name = value = 0x238 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .dynsym
4: name = value = 0x340 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .dynstr
5: name = value = 0x390 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .rela.dyn
6: name = value = 0x3d8 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .rela.plt
7: name = value = 0x410 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .plt
8: name = value = 0x440 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .text
9: name = value = 0x4a8 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .eh_frame_hdr
10: name = value = 0x4d0 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .eh_frame
11: name = value = 0x200eb0 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .dynamic
12: name = value = 0x200fd0 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .got
13: name = value = 0x200fe8 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .got.plt
14: name = value = 0x201010 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .data
15: name = value = 0 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .comment
16: name = a.c value = 0 size = 0 type = STT_FILE binding = STB_LOCAL defined section index = SHN_ABS
17: name = _DYNAMIC value = 0x200eb0 size = 0 type = STT_OBJECT binding = STB_LOCAL defined section index = SHN_ABS
18: name = _GLOBAL_OFFSET_TABLE_ value = 0x200fe8 size = 0 type = STT_OBJECT binding = STB_LOCAL defined section index = SHN_ABS
19: name = global2 value = 0x201018 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
20: name = sum_global4 value = 0x488 size = 0x1e type = STT_FUNC binding = STB_GLOBAL defined section index = .text
21: name = sum_global value = 0x440 size = 0x2a type = STT_FUNC binding = STB_GLOBAL defined section index = .text
22: name = global1 value = 0x201010 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
23: name = global3 value = 0x201020 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
24: name = __bss_start value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
25: name = _edata value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
26: name = _end value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
27: name = sum_global2 value = 0x46a size = 0x1e type = STT_FUNC binding = STB_GLOBAL defined section index = .text
Found dynamic linking symbol table:
0: name = value = 0 size = 0 type = STT_NOTYPE binding = STB_LOCAL defined section index = SHN_UNDEF
1: name = value = 0x440 size = 0 type = STT_SECTION binding = STB_LOCAL defined section index = .text
2: name = sum_global value = 0x440 size = 0x2a type = STT_FUNC binding = STB_GLOBAL defined section index = .text
3: name = global3 value = 0x201020 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
4: name = _edata value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
5: name = _end value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
6: name = sum_global4 value = 0x488 size = 0x1e type = STT_FUNC binding = STB_GLOBAL defined section index = .text
7: name = global1 value = 0x201010 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
8: name = __bss_start value = 0x201028 size = 0 type = STT_NOTYPE binding = STB_GLOBAL defined section index = SHN_ABS
9: name = global2 value = 0x201018 size = 0x8 type = STT_OBJECT binding = STB_GLOBAL defined section index = .data
10: name = sum_global2 value = 0x46a size = 0x1e type = STT_FUNC binding = STB_GLOBAL defined section index = .text
GOT:
0: 0
1: 0
2: 0
GOT.PLT:
0: 0x200eb0
1: 0
2: 0
3: 0x426
4: 0x436
readelf --relocs a.so
Relocation section '.rela.dyn' at offset 0x390 contains 3 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000200fd0 000900000006 R_X86_64_GLOB_DAT 0000000000201018 global2 + 0
000000200fd8 000700000006 R_X86_64_GLOB_DAT 0000000000201010 global1 + 0
000000200fe0 000300000006 R_X86_64_GLOB_DAT 0000000000201020 global3 + 0
Relocation section '.rela.plt' at offset 0x3d8 contains 2 entries:
Offset Info Type Sym. Value Sym. Name + Addend
000000201000 000200000007 R_X86_64_JUMP_SLO 0000000000000440 sum_global + 0
000000201008 000a00000007 R_X86_64_JUMP_SLO 000000000000046a sum_global2 + 0
objdump -S a.so
a.so: file format elf64-x86-64
Disassembly of section .plt:
0000000000000410 <sum_global@plt-0x10>:
410: ff 35 da 0b 20 00 pushq 0x200bda(%rip) # 200ff0 <_GLOBAL_OFFSET_TABLE_+0x8>
416: ff 25 dc 0b 20 00 jmpq *0x200bdc(%rip) # 200ff8 <_GLOBAL_OFFSET_TABLE_+0x10>
41c: 0f 1f 40 00 nopl 0x0(%rax)
0000000000000420 <sum_global@plt>:
420: ff 25 da 0b 20 00 jmpq *0x200bda(%rip) # 201000 <_GLOBAL_OFFSET_TABLE_+0x18>
426: 68 00 00 00 00 pushq $0x0
42b: e9 e0 ff ff ff jmpq 410 <sum_global@plt-0x10>
0000000000000430 <sum_global2@plt>:
430: ff 25 d2 0b 20 00 jmpq *0x200bd2(%rip) # 201008 <_GLOBAL_OFFSET_TABLE_+0x20>
436: 68 01 00 00 00 pushq $0x1
43b: e9 d0 ff ff ff jmpq 410 <sum_global@plt-0x10>
Disassembly of section .text:
0000000000000440 <sum_global>:
440: 55 push %rbp
441: 48 89 e5 mov %rsp,%rbp
444: 48 8b 05 8d 0b 20 00 mov 0x200b8d(%rip),%rax # 200fd8 <_DYNAMIC+0x128>
44b: 48 8b 00 mov (%rax),%rax
44e: 89 c2 mov %eax,%edx
450: 48 8b 05 79 0b 20 00 mov 0x200b79(%rip),%rax # 200fd0 <_DYNAMIC+0x120>
457: 48 8b 00 mov (%rax),%rax
45a: 01 c2 add %eax,%edx
45c: 48 8b 05 7d 0b 20 00 mov 0x200b7d(%rip),%rax # 200fe0 <_DYNAMIC+0x130>
463: 48 8b 00 mov (%rax),%rax
466: 01 d0 add %edx,%eax
468: 5d pop %rbp
469: c3 retq
000000000000046a <sum_global2>:
46a: 55 push %rbp
46b: 48 89 e5 mov %rsp,%rbp
46e: 53 push %rbx
46f: 48 83 ec 08 sub $0x8,%rsp
473: e8 a8 ff ff ff callq 420 <sum_global@plt>
478: 89 c3 mov %eax,%ebx
47a: e8 a1 ff ff ff callq 420 <sum_global@plt>
47f: 01 d8 add %ebx,%eax
481: 48 83 c4 08 add $0x8,%rsp
485: 5b pop %rbx
486: 5d pop %rbp
487: c3 retq
0000000000000488 <sum_global4>:
488: 55 push %rbp
489: 48 89 e5 mov %rsp,%rbp
48c: 53 push %rbx
48d: 48 83 ec 08 sub $0x8,%rsp
491: e8 9a ff ff ff callq 430 <sum_global2@plt>
496: 89 c3 mov %eax,%ebx
498: e8 93 ff ff ff callq 430 <sum_global2@plt>
49d: 01 d8 add %ebx,%eax
49f: 48 83 c4 08 add $0x8,%rsp
4a3: 5b pop %rbx
4a4: 5d pop %rbp
4a5: c3 retq
从print_elf64的结果可以看出,a.so的第二个PT_LOAD在文件中的offset和PT_DYNAMIC的offset是一样的,都是0xeb0,也就是说:
PT_LOAD的一开始是288字节的dynamic section, 0xeb0 + 288 = 0xfd0.
.got在文件中的offset就是0xfd0,所以接着下边的数据是.got, .got的大小是24字节, 0xfd0+24=0xfe8.
.got.plt在文件中的offset是0xfe8,所以再接下来的数据是.got.plt, 它的大小是40字节. 0xfe8+40=0x1010
.data在文件中的offset是0x1010,大小是24字节, 288 + 24 + 40 + 24 = 376, 正好是PT_LOAD的大小.
_______________ a.so的第二个PT_LOAD,offset=0xeb0
| |
| |
| | 288字节的PT_DYNAMIC, end = 0xfd0
| |
|_______________|
| |
| | 24字节的got, end = 0xfe8
|_______________|
| |
| | 40字节的got.plt, end = 0x1010
|_______________|
| |
| | 24字节的.data
|_______________|
平时我们接触到C语言的数组时,下标都是要从0开始,没有负数的,实际上也可以是负数,它们都是指针加加减减而已.
GOT(Global Offset Table)就是个例子.
在abi.pdf里可以看到GOT是这样声明的:
extern Elf64_Addr _GLOBAL_OFFSET_TABLE_ [];
The symbol _GLOBAL_OFFSET_TABLE_ may reside in the middle of the .got section, allowing both negative and non-negative offsets into the array of addresses.
a.so的例子验证了这一说法: _GLOBAL_OFFSET_TABLE_ = 0x200fe8, 这个位置正好是.got的结束,.got.plt的开始.
这也说明了.got和.got.plt只不过是把GOT给分成了两块,命了两个名字而已,实际上还是一个GOT. 我们来看下GOT的内容:
GOT[-3] = 0 // fd0
GOT[-2] = 0 // fd8
GOT[-1] = 0 // fe0
GOT[0] = 0x200eb0 // fe8
GOT[1] = 0 // ff0
GOT[2] = 0 // ff8
GOT[3] = 0x426 // 1000
GOT[4] = 0x436 // 1008
abi.pdf里说GOT[0]指向了 dynamic section, 1和2 are reserved.
接下来看一下sum_global的实现:
444: 48 8b 05 8d 0b 20 00 mov 0x200b8d(%rip),%rax # 200fd8 <_DYNAMIC+0x128>
44b: 48 8b 00 mov (%rax),%rax
44e: 89 c2 mov %eax,%edx
450: 48 8b 05 79 0b 20 00 mov 0x200b79(%rip),%rax # 200fd0 <_DYNAMIC+0x120>
457: 48 8b 00 mov (%rax),%rax
45a: 01 c2 add %eax,%edx
45c: 48 8b 05 7d 0b 20 00 mov 0x200b7d(%rip),%rax # 200fe0 <_DYNAMIC+0x130>
463: 48 8b 00 mov (%rax),%rax
466: 01 d0 add %edx,%eax
fd0, fd8, fe0分别对应着GOT[-3], GOT[-2], GOT[-1], 汇编代码把GOT[x]指向的内存地址里的内容加和到%eax里,完成了3个global的求和.
这样的话,当a.so加载到内存的时候,GOT[-3,-2,-1]的值要从0修改到0x1010,0x1018,0x1020
这时间看下 .rela.dyn
Offset Sym. Value Sym. Name + Addend
000000200fd0 0000000000201018 global2 + 0
000000200fd8 0000000000201010 global1 + 0
000000200fe0 0000000000201020 global3 + 0
Ok, 这下了明白了,当a.so加载时,啥都不用管,直接读取.rela.dyn的内容,把 load_addr + Offset 处的值修改成 load_addr + Sym. Value, 就可以了.
再看下 sum_global2 的实现:
473: e8 a8 ff ff ff callq 420 <sum_global@plt>
478: 89 c3 mov %eax,%ebx
47a: e8 a1 ff ff ff callq 420 <sum_global@plt>
47f: 01 d8 add %ebx,%eax
0000000000000420 <sum_global@plt>:
420: ff 25 da 0b 20 00 jmpq *0x200bda(%rip) # 201000 <_GLOBAL_OFFSET_TABLE_+0x18>
426: 68 00 00 00 00 pushq $0x0
42b: e9 e0 ff ff ff jmpq 410 <sum_global@plt-0x10>
调用sum_global时其实是调用了sum_global@plt,进而跳转到了GOT[3]指向的内存地址执行. GOT[3]现在指向0x426,也就是jmp的下一条指令.
0x426把0压入栈,然后跳到0x410执行.
0000000000000410 <sum_global@plt-0x10>:
410: ff 35 da 0b 20 00 pushq 0x200bda(%rip) # 200ff0 <_GLOBAL_OFFSET_TABLE_+0x8>
416: ff 25 dc 0b 20 00 jmpq *0x200bdc(%rip) # 200ff8 <_GLOBAL_OFFSET_TABLE_+0x10>
41c: 0f 1f 40 00 nopl 0x0(%rax)
0x410把GOT[1]压入栈,然后跳到GOT[2]指向的内存地址执行.相当于调用 GOT[2](GOT[1], 0).
同样的,调用sum_global2相当于 GOT[2](GOT[1], 1).
现在需要确定的就是GOT[2]和GOT[1]的内容了. 根据之前了解到的信息,应该是要调用 ld-linux-x86-64.so.2 里的某个函数.这个函数的第一个参数是GOT[1],第二个参数是 .real.plt index.
看下.rela.plt:
Offset Sym. Value Sym. Name + Addend
000000201000 0000000000000440 sum_global + 0
000000201008 000000000000046a sum_global2 + 0
sum_global定义是0x440处,sum_global2定义在0x46a处,也就是说,
GOT[2](GOT[1], 0)要把GOT[3]修改为 load_addr + 0x440,
GOT[2](GOT[1], 1)要把GOT[4]修改为 load_addr + 0x46a,
这样下次再call sum_global@plt时,直接就跳到了sum_global里开始执行了.(比正常调用函数多了一个jmp指令)
顺过来再整理一遍:
当把代码编译成shared object时,遇到全局变量,就在GOT里预留个位置,并在.rela.dyn里加一条记录,说明要把GOT里预留的这个位置处填上什么值.
遇到function,也在GOT里预留个位置,并在.rela.plt里加一条记录,说明要把GOT里预留的这个位置处填上什么值,然后再生成对应的一段代码
jmp *GOT[N]
push N
jmp plt_entry
然后把这段代码里push N的地址填写到GOT[N]里.
程序加载时,kernel加载程序本身,再加载ld-linux-x86-64.so.2,然后转到ld-linux-x86-64.so.2里执行.
ld-linux-x86-64.so.2把其它.so文件加载进来,并按照.so文件.rela.dyn里说的,修改GOT里全局变量的地址.函数的地址保持不变.
当程序调用到.so文件里的函数时,还不清楚.写个程序看看.
extern int sum_global(void);
extern int sum_global2(void);
extern int sum_global4(void);
#include <stdio.h>
int main(void)
{
printf("%d\n", sum_global() + sum_global2() + sum_global4());
return 0;
}
gcc -fpic -shared -o liba.so a.c
gcc b.c -L. -la -Wl,-rpath,/tmp/
./a.out
objdump -S a.out 一看,还是一样的,a.out里调用的sum_global函数都有对应的plt,最后还是调用GOT[2](GOT[1], index)
现在的关键就在于搞清楚GOT[2] GOT[1]了.
接着看libc.
sysdeps/x86_64/dl-machine.h#145 _start
movq %rsp, %rdi
call _dl_start
elf/rtld.c#367 _dl_start(arg)
l_addr = elf_machine_load_address ();
l_ld = (void *) bootstrap_map.l_addr + elf_machine_dynamic ();
elf_get_dynamic_info (&bootstrap_map, NULL); // elf/dynamic-link.h#104 got PT_DYNAMIC d_tags
_dl_start_final(arg, &info);
elf/rtld.c#275 _dl_start_final(arg, &info);
/* Call the OS-dependent function to set up life so we can do things like
file access. It will call `dl_main' (below) to do all the real work
of the dynamic linker, and then unwind our frame and run the user
entry point on the same stack we entered on.*/
_dl_sysdep_start (arg, &dl_main);
elf/dl-sysdep.c#86 _dl_sysdep_start(arg, &dl_main)
DL_FIND_ARG_COMPONENTS() // 在栈上找到elf_info,从而拿到了要执行的程序的信息
(*dl_main) (phdr, phnum, &user_entry, _dl_auxv); // phdr,phnum是要执行的程序的program header,不是ld.so的
elf/rtld.c#892 dl_main()
dl_main过了一遍program header table,记下来各个segment的信息.
检查环境变量LD_PRELOAD和文件/etc/ld.so.preload,如果指定了要加载的so文件,就加载
// Load all the libraries specified by DT_NEEDED entries
_dl_map_object_deps (main_map, preloads, npreloads, mode == trace, 0);
// Now we have all the objects loaded. Relocate them all except for the dynamic linker itself.
_dl_relocate_object() // elf/dl-reloc.c#152
// Do the actual relocation of the object's GOT and other data.
ELF_DYNAMIC_RELOCATE (l, lazy, consider_profiling, skip_ifunc);
elf_machine_runtime_setup()
/* The GOT entries for functions in the PLT have not yet been filled
in. Their initial contents will arrange when called to push an
offset into the .rel.plt section, push _GLOBAL_OFFSET_TABLE_[1],
and then jump to _GLOBAL_OFFSET_TABLE_[2]. */
got = (Elf64_Addr *) D_PTR (l, l_info[DT_PLTGOT]);
got[1] = (Elf64_Addr) l; /* Identify this shared object. */
/* The got[2] entry contains the address of a function which gets
called to get the addresses of a so far unresolved function and
jump to it. */
/* This function will get called to fix up the GOT entry indicated by
the offset on the stack, and then jump to the resolved address. */
got[2] = (Elf64_Addr) &_dl_runtime_resolve;
ELF_DYNAMIC_DO_REL() => elf_dynamic_do_Rel()
ELF_DYNAMIC_DO_RELA() => elf_dynamic_do_Rela()
elf_dynamic_do_Rel()
OK, 现在搞明白GOT[2](GOT[1], index)里GOT[2]和GOT[1]都是什么了.
GOT[2] = &_dl_runtime_resolve
GOT[1] = Identify this shared object.