# 数组

## 18.1 小例子

``````#include <stdio.h>
int main()
{
int a[20];
int i;
for (i=0; i<20; i++)
a[i]=i*2;
for (i=0; i<20; i++)
printf ("a[%d]=%d", i, a[i]);
return 0;
};
``````

### 18.1.1 x86

#### MSVC

Listing 18.1: MSVC

``````_TEXT   SEGMENT
_i\$ = -84                                   ; size = 4
_a\$ = -80                                   ; size = 80
_main       PROC
push    ebp
mov     ebp, esp
sub     esp, 84         ; 00000054H
mov     DWORD PTR _i\$[ebp], 0
jmp     SHORT \$LN6@main
\$LN5@main:
mov     eax, DWORD PTR _i\$[ebp]
mov     DWORD PTR _i\$[ebp], eax
\$LN6@main:
cmp     DWORD PTR _i\$[ebp], 20 ; 00000014H
jge     SHORT \$LN4@main
mov     ecx, DWORD PTR _i\$[ebp]
shl     ecx, 1
mov     edx, DWORD PTR _i\$[ebp]
mov     DWORD PTR _a\$[ebp+edx*4], ecx
jmp     SHORT \$LN5@main
\$LN4@main:
mov     DWORD PTR _i\$[ebp], 0
jmp     SHORT \$LN3@main
\$LN2@main:
mov     eax, DWORD PTR _i\$[ebp]
mov     DWORD PTR _i\$[ebp], eax
\$LN3@main:
cmp     DWORD PTR _i\$[ebp], 20 ; 00000014H
jge     SHORT \$LN1@main
mov     ecx, DWORD PTR _i\$[ebp]
mov     edx, DWORD PTR _a\$[ebp+ecx*4]
push    edx
mov     eax, DWORD PTR _i\$[ebp]
push    eax
push    OFFSET \$SG2463
call    _printf
jmp     SHORT \$LN2@main
\$LN1@main:
xor eax, eax
mov esp, ebp
pop ebp
ret 0
_main ENDP
``````

Let’s try this example in OllyDbg 缺漏 =

#### GCC

GCC 4.4.1编译后为：

Listing 18.2: GCC 4.4.1

``````            public main
main        proc near                       ; DATA XREF: _start+17

var_70      = dword ptr -70h
var_6C      = dword ptr -6Ch
var_68      = dword ptr -68h
i_2         = dword ptr -54h
i           = dword ptr -4

push    ebp
mov     ebp, esp
and     esp, 0FFFFFFF0h
sub     esp, 70h
mov     [esp+70h+i], 0 ; i=0
jmp     short loc_804840A
loc_80483F7:
mov     eax, [esp+70h+i]
mov     edx, [esp+70h+i]
mov     [esp+eax*4+70h+i_2], edx
loc_804840A:
cmp     [esp+70h+i], 13h
jle     short loc_80483F7
mov     [esp+70h+i], 0
jmp     short loc_8048441
loc_804841B:
mov     eax, [esp+70h+i]
mov     edx, [esp+eax*4+70h+i_2]
mov     eax, offset aADD ; "a[%d]=%d
"
mov     [esp+70h+var_68], edx
mov     edx, [esp+70h+i]
mov     [esp+70h+var_6C], edx
mov     [esp+70h+var_70], eax
call    _printf
loc_8048441:
cmp     [esp+70h+i], 13h
jle     short loc_804841B
mov     eax, 0
leave
retn
main        endp
``````

### 18.1.2 ARM

#### ARM + Non-optimizing Keil + ARM mode

``````        EXPORT _main
_main
STMFD   SP!, {R4,LR}
SUB     SP, SP, #0x50           ; allocate place for 20 int variables
; first loop
MOV     R4, #0                  ; i
B       loc_4A0
loc_494
MOV     R0, R4,LSL#1            ; R0=R4*2
STR     R0, [SP,R4,LSL#2]       ; store R0 to SP+R4<<2 (same as SP+R4*4)
ADD     R4, R4, #1              ; i=i+1
loc_4A0
CMP     R4, #20                 ; i<20?
BLT     loc_494                 ; yes, run loop body again
; second loop
MOV     R4, #0                  ; i
B       loc_4C4
loc_4B0
LDR     R2, [SP,R4,LSL#2]       ; (second printf argument) R2=*(SP+R4<<4) (same as *(SP+R4*4))
MOV     R1, R4                  ; (first printf argument) R1=i
"
BL      __2printf
ADD     R4, R4, #1              ; i=i+1
loc_4C4
CMP     R4, #20                 ; i<20?
BLT     loc_4B0                 ; yes, run loop body again
MOV     R0, #0                  ; value to return
ADD     SP, SP, #0x50           ; deallocate place, allocated for 20 int variables
LDMFD   SP!, {R4,PC}
``````

int类型长度为32bits即4字节，20个int变量需要80（0x50）字节，因此“sub sp,sp,#0x50”指令为在栈上分配存储空间。 两个循环迭代器i被存储在R4寄存器中。 值i2被写入数组，通过将i值左移1位实现乘以2的效果，整个过程通过”MOV R0,R4,LSL#1指令来实现。 “STR R0, [SP,R4,LSL#2]”把R0内容写入数组。过程为：SP指向数组开始，R4是i，i左移2位相当于乘以4，即(SP+R44)=R0。 第二个loop的“LDR R2, [SP,R4,LSL#2]”从数组读取数值到寄存器，R2=(SP+R4*4)。

#### ARM + Keil + thumb 模式优化后

``````_main
PUSH    {R4,R5,LR}
; allocate place for 20 int variables + one more variable
SUB     SP, SP, #0x54
; first loop
MOVS    R0, #0                  ; i
MOV     R5, SP                  ; pointer to first array element
loc_1CE
LSLS    R1, R0, #1              ; R1=i<<1 (same as i*2)
LSLS    R2, R0, #2              ; R2=i<<2 (same as i*4)
ADDS    R0, R0, #1              ; i=i+1
CMP     R0, #20                 ; i<20?
STR     R1, [R5,R2]             ; store R1 to *(R5+R2) (same R5+i*4)
BLT     loc_1CE                 ; yes, i<20, run loop body again
; second loop
MOVS    R4, #0                  ; i=0
loc_1DC
LSLS    R0, R4, #2              ; R0=i<<2 (same as i*4)
LDR     R2, [R5,R0]             ; load from *(R5+R0) (same as R5+i*4)
MOVS    R1, R4
"
BL      __2printf
ADDS    R4, R4, #1              ; i=i+1
CMP     R4, #20                 ; i<20?
BLT     loc_1DC                 ; yes, i<20, run loop body again
MOVS    R0, #0                  ; value to return
; deallocate place, allocated for 20 int variables + one more variable
POP     {R4,R5,PC}
``````

Thumb代码也是非常类似的。Thumb模式计算数组偏移的移位操作使用特定的指令LSLS。 编译器在堆栈中申请的数组空间更大，但是最后4个字节的空间未使用。

## 18.2 缓冲区溢出

### 18.2.1 读取外部数组的边界

Array[index]中index指代数组索引，仔细观察下面的代码，你可能注意到代码没有index是否小于20。如果index大于20？这是C/C++经常被批评的特征。 以下代码可以成功编译可以工作：

``````#include <stdio.h>
int main()
{
int a[20];
int i;
for (i=0; i<20; i++)
a[i]=i*2;
printf ("a[100]=%d", a[100]);
return 0;
};
``````

``````_TEXT   SEGMENT
_i\$ = -84                               ; size = 4
_a\$ = -80                               ; size = 80
_main       PROC
push    ebp
mov     ebp, esp
sub     esp, 84                 ; 00000054H
mov     DWORD PTR _i\$[ebp], 0
jmp     SHORT \$LN3@main
\$LN2@main:
mov     eax, DWORD PTR _i\$[ebp]
mov     DWORD PTR _i\$[ebp], eax
\$LN3@main:
cmp     DWORD PTR _i\$[ebp], 20  ; 00000014H
jge     SHORT \$LN1@main
mov     ecx, DWORD PTR _i\$[ebp]
shl     ecx, 1
mov     edx, DWORD PTR _i\$[ebp]
mov     DWORD PTR _a\$[ebp+edx*4], ecx
jmp     SHORT \$LN2@main
\$LN1@main:
mov     eax, DWORD PTR _a\$[ebp+400]
push    eax
push    OFFSET \$SG2460
call    _printf
xor     eax, eax
mov     esp, ebp
pop     ebp
ret     0
_main       ENDP
``````

``````#include <stdio.h>
int main()
{
int a[20];
int i;

for (i=0; i<30; i++)
a[i]=i;

return 0;
};
``````

``````_TEXT   SEGMENT
_i\$ = -84                                   ; size = 4
_a\$ = -80                                   ; size = 80
_main       PROC
push    ebp
mov     ebp, esp
sub     esp, 84 ; 00000054H
mov     DWORD PTR _i\$[ebp], 0
jmp     SHORT \$LN3@main
\$LN2@main:
mov     eax, DWORD PTR _i\$[ebp]
mov     DWORD PTR _i\$[ebp], eax
\$LN3@main:
cmp     DWORD PTR _i\$[ebp], 30 ; 0000001eH
jge     SHORT \$LN1@main
mov     ecx, DWORD PTR _i\$[ebp]
mov     edx, DWORD PTR _i\$[ebp] ; that instruction is obviously redundant
mov     DWORD PTR _a\$[ebp+ecx*4], edx ; ECX could be used as second operand here instead
jmp     SHORT \$LN2@main
\$LN1@main:
xor     eax, eax
mov     esp, ebp
pop     ebp
ret     0
_main       ENDP
``````

``````generic tracer 0.4 (WIN32), http://conus.info/gt

New process: C:PRJ...1.exe, PID=7988
EXCEPTION_ACCESS_VIOLATION: 0x15 (<symbol (0x15) is in unknown module>), ExceptionInformation
[0]=8
EAX=0x00000000 EBX=0x7EFDE000 ECX=0x0000001D EDX=0x0000001D
ESI=0x00000000 EDI=0x00000000 EBP=0x00000014 ESP=0x0018FF48
EIP=0x00000015
FLAGS=PF ZF IF RF
PID=7988|Process exit, return code -1073740791
``````

``````ESP
ESP+4
ESP+84
ESP+88
4 bytes for i
80 bytes for a[20] array
saved EBP value
``````

``````            public main
main        proc near

a           = dword ptr -54h
i           = dword ptr -4

push    ebp
mov     ebp, esp
sub     esp, 60h
mov     [ebp+i], 0
jmp     short loc_80483D1
loc_80483C3:
mov     eax, [ebp+i]
mov     edx, [ebp+i]
mov     [ebp+eax*4+a], edx
loc_80483D1:
cmp     [ebp+i], 1Dh
jle     short loc_80483C3
mov     eax, 0
leave
retn
main        endp
``````

``````(gdb) r
Starting program: /home/dennis/RE/1

Program received signal SIGSEGV, Segmentation fault.
0x00000016 in ?? ()
(gdb) info registers
eax         0x0                 0
ecx         0xd2f96388      -755407992
edx         0x1d            29
ebx         0x26eff4        2551796
esp         0xbffff4b0      0xbffff4b0
ebp         0x15            0x15
esi         0x0                 0
edi         0x0                 0
eip         0x16            0x16
eflags      0x10202         [ IF RF ]
cs          0x73            115
ss          0x7b            123
ds          0x7b            123
es          0x7b            123
fs          0x0                 0
gs          0x33            51
(gdb)
``````

## 18.3 防止缓冲区溢出的方法

``````/RTCs Stack Frame runtime checking
/GZ Enable stack checks (/RTCs)
``````

``````#include <malloc.h>
#include <stdio.h>
void f()
{
char *buf=(char*)alloca (600);
_snprintf (buf, 600, "hi! %d, %d, %d", 1, 2, 3);

puts (buf);
};
``````

Listing 18.3: GCC 4.7.3

``````.LC0:
.string "hi! %d, %d, %d
"
f:
push    ebp
mov     ebp, esp
push    ebx
sub     esp, 676
lea     ebx, [esp+39]
and     ebx, -16
mov     DWORD PTR [esp+20], 3
mov     DWORD PTR [esp+16], 2
mov     DWORD PTR [esp+12], 1
mov     DWORD PTR [esp+8], OFFSET FLAT:.LC0     ; "hi! %d, %d, %d
"
mov     DWORD PTR [esp+4], 600
mov     DWORD PTR [esp], ebx
mov     eax, DWORD PTR gs:20                    ; canary
mov     DWORD PTR [ebp-12], eax
xor     eax, eax
call    _snprintf
mov     DWORD PTR [esp], ebx
call    puts
mov     eax, DWORD PTR [ebp-12]
xor     eax, DWORD PTR gs:20                    ; canary
jne     .L5
mov     ebx, DWORD PTR [ebp-4]
leave
ret
.L5:
call __stack_chk_fail
``````

``````*** buffer overflow detected ***: ./2_1 terminated
======= Backtrace: =========
/lib/i386-linux-gnu/libc.so.6(__fortify_fail+0x63)[0xb7699bc3]
/lib/i386-linux-gnu/libc.so.6(+0x10593a)[0xb769893a]
/lib/i386-linux-gnu/libc.so.6(+0x105008)[0xb7698008]
/lib/i386-linux-gnu/libc.so.6(_IO_default_xsputn+0x8c)[0xb7606e5c]
/lib/i386-linux-gnu/libc.so.6(_IO_vfprintf+0x165)[0xb75d7a45]
/lib/i386-linux-gnu/libc.so.6(__vsprintf_chk+0xc9)[0xb76980d9]
/lib/i386-linux-gnu/libc.so.6(__sprintf_chk+0x2f)[0xb7697fef]
./2_1[0x8048404]
/lib/i386-linux-gnu/libc.so.6(__libc_start_main+0xf5)[0xb75ac935]
======= Memory map: ========
08048000-08049000 r-xp 00000000 08:01 2097586 /home/dennis/2_1
08049000-0804a000 r--p 00000000 08:01 2097586 /home/dennis/2_1
0804a000-0804b000 rw-p 00001000 08:01 2097586 /home/dennis/2_1
094d1000-094f2000 rw-p 00000000 00:00 0 [heap]
b7560000-b757b000 r-xp 00000000 08:01 1048602 /lib/i386-linux-gnu/libgcc_s.so.1
b757b000-b757c000 r--p 0001a000 08:01 1048602 /lib/i386-linux-gnu/libgcc_s.so.1
b757c000-b757d000 rw-p 0001b000 08:01 1048602 /lib/i386-linux-gnu/libgcc_s.so.1
b7592000-b7593000 rw-p 00000000 00:00 0
b7593000-b7740000 r-xp 00000000 08:01 1050781 /lib/i386-linux-gnu/libc-2.17.so
b7740000-b7742000 r--p 001ad000 08:01 1050781 /lib/i386-linux-gnu/libc-2.17.so
b7742000-b7743000 rw-p 001af000 08:01 1050781 /lib/i386-linux-gnu/libc-2.17.so
b7743000-b7746000 rw-p 00000000 00:00 0
b775a000-b775d000 rw-p 00000000 00:00 0
b775d000-b775e000 r-xp 00000000 00:00 0 [vdso]
b775e000-b777e000 r-xp 00000000 08:01 1050794 /lib/i386-linux-gnu/ld-2.17.so
b777e000-b777f000 r--p 0001f000 08:01 1050794 /lib/i386-linux-gnu/ld-2.17.so
b777f000-b7780000 rw-p 00020000 08:01 1050794 /lib/i386-linux-gnu/ld-2.17.so
bff35000-bff56000 rw-p 00000000 00:00 0 [stack]
Aborted (core dumped)
``````

gs被叫做段寄存器，这些寄存器被广泛用在MS-DOS和扩展DOS时代。现在的作用和以前不同。简要的说，gs寄存器在linux下一直指向TLS（48）--存储线程的各种信息（win32环境下，fs寄存器同样的作用，指向TIB8 9）。 更多信息请参考linux源码arch/x86/include/asm/stackprotector.h（至少3.11版本）。

### 18.3.1 Optimizing Xcode (LLVM) + thumb-2 mode

``````_main
var_64      = -0x64
var_60      = -0x60
var_5C      = -0x5C
var_58      = -0x58
var_54      = -0x54
var_50      = -0x50
var_4C      = -0x4C
var_48      = -0x48
var_44      = -0x44
var_40      = -0x40
var_3C      = -0x3C
var_38      = -0x38
var_34      = -0x34
var_30      = -0x30
var_2C      = -0x2C
var_28      = -0x28
var_24      = -0x24
var_20      = -0x20
var_1C      = -0x1C
var_18      = -0x18
canary      = -0x14
var_10      = -0x10

PUSH    {R4-R7,LR}
STR.W   R8, [SP,#0xC+var_10]!
SUB     SP, SP, #0x54
MOVW    R0, #aObjc_methtype ; "objc_methtype"
MOVS    R2, #0
MOVT.W  R0, #0
MOVS    R5, #0
LDR.W   R8, [R0]
LDR.W   R0, [R8]
STR     R0, [SP,#0x64+canary]
MOVS    R0, #2
STR     R2, [SP,#0x64+var_64]
STR     R0, [SP,#0x64+var_60]
MOVS    R0, #4
STR     R0, [SP,#0x64+var_5C]
MOVS    R0, #6
STR     R0, [SP,#0x64+var_58]
MOVS    R0, #8
STR     R0, [SP,#0x64+var_54]
MOVS    R0, #0xA
STR     R0, [SP,#0x64+var_50]
MOVS    R0, #0xC
STR     R0, [SP,#0x64+var_4C]
MOVS    R0, #0xE
STR     R0, [SP,#0x64+var_48]
MOVS    R0, #0x10
STR     R0, [SP,#0x64+var_44]
MOVS    R0, #0x12
STR     R0, [SP,#0x64+var_40]
MOVS    R0, #0x14
STR     R0, [SP,#0x64+var_3C]
MOVS    R0, #0x16
STR     R0, [SP,#0x64+var_38]
MOVS    R0, #0x18
STR     R0, [SP,#0x64+var_34]
MOVS    R0, #0x1A
STR     R0, [SP,#0x64+var_30]
MOVS    R0, #0x1C
STR     R0, [SP,#0x64+var_2C]
MOVS    R0, #0x1E
STR     R0, [SP,#0x64+var_28]
MOVS    R0, #0x20
STR     R0, [SP,#0x64+var_24]
MOVS    R0, #0x22
STR     R0, [SP,#0x64+var_20]
MOVS    R0, #0x24
STR     R0, [SP,#0x64+var_1C]
MOVS    R0, #0x26
STR     R0, [SP,#0x64+var_18]
MOV     R4, 0xFDA ; "a[%d]=%d
"
MOV     R0, SP
B loc_2F1C
; second loop begin

loc_2F14
LDR.W   R2, [R6,R5,LSL#2]
MOV     R5, R0
loc_2F1C
MOV     R0, R4
MOV     R1, R5
BLX     _printf
CMP     R5, #0x13
BNE     loc_2F14
LDR.W   R0, [R8]
LDR     R1, [SP,#0x64+canary]
CMP     R0, R1
ITTTT   EQ              ; canary still correct?
MOVEQ   R0, #0
LDREQ.W R8, [SP+0x64+var_64],#4
POPEQ   {R4-R7,PC}
BLX     ___stack_chk_fail
``````

### 18.4 One more word about arrays

``````void f(int size)
{
int a[size];
...
};
``````

## 18.6 多维数组

``````[0][0]
[0][1]
[0][2]
[0][3]
[1][0]
[1][4]
[1][5]
[1][6]
[2][0]
[2][7]
[2][8]
[2][9]
``````

1 2 3
4 5 6 7
8 9 10 11

### 18.6.3 多维数组

Listing 18.4: simple example

``````#include <stdio.h>

int a[10][20][30];

void insert(int x, int y, int z, int value)
{
a[x][y][z]=value;
};
``````

#### x86

MSVC 2010：

Listing 18.5: MSVC 2010

``````_DATA   SEGMENT
COMM    _a:DWORD:01770H
_DATA   ENDS
PUBLIC  _insert
_TEXT   SEGMENT
_x\$ = 8         ; size = 4
_y\$ = 12        ; size = 4
_z\$ = 16        ; size = 4
_value\$ = 20    ; size = 4
_insert     PROC
push    ebp
mov     ebp, esp
mov     eax, DWORD PTR _x\$[ebp]
imul    eax, 2400                   ; eax=600*4*x
mov     ecx, DWORD PTR _y\$[ebp]
imul    ecx, 120                    ; ecx=30*4*y
lea     edx, DWORD PTR _a[eax+ecx]  ; edx=a + 600*4*x + 30*4*y
mov     eax, DWORD PTR _z\$[ebp]
mov     ecx, DWORD PTR _value\$[ebp]
mov     DWORD PTR [edx+eax*4], ecx  ; *(edx+z*4)=value
pop     ebp
ret     0
_insert ENDP
_TEXT ENDS
``````

Listing 18.6: GCC 4.4.1

``````        public insert
insert  proc near
x       = dword ptr 8
y       = dword ptr 0Ch
z       = dword ptr 10h
value   = dword ptr 14h
push    ebp
mov     ebp, esp
push    ebx
mov     ebx, [ebp+x]
mov     eax, [ebp+y]
mov     ecx, [ebp+z]
lea     edx, [eax+eax] ; edx=y*2
mov     eax, edx ; eax=y*2
shl     eax, 4 ; eax=(y*2)<<4 = y*2*16 = y*32
sub     eax, edx ; eax=y*32 - y*2=y*30
imul    edx, ebx, 600 ; edx=x*600
add     eax, edx ; eax=eax+edx=y*30 + x*600
lea     edx, [eax+ecx] ; edx=y*30 + x*600 + z
mov     eax, [ebp+value]
mov     dword ptr ds:a[edx*4], eax ; *(a+edx*4)=value
pop     ebx
pop     ebp
retn
insert  endp
``````

GCC使用的不同的计算方法。为计算第一个操作值30y，GCC没有使用乘法指令。GCC的做法是：(???? + ????) ≪ 4 − (???? + ????) = (2????) ≪ 4 − 2???? = 2 ・ 16 ・ ???? − 2???? = 32???? − 2???? = 30????。因此30y的计算仅使用加法和移位操作，这样速度更快。

#### ARM + Non-optimizing Xcode (LLVM) + thumb mode

Listing 18.7: Non-optimizing Xcode (LLVM) + thumb mode

``````_insert

value       = -0x10
z           = -0xC
y           = -8
x           = -4

; allocate place in local stack for 4 values of int type
SUB         SP, SP, #0x10
MOV         R9, 0xFC2 ; a
LDR.W       R9, [R9]
STR         R0, [SP,#0x10+x]
STR         R1, [SP,#0x10+y]
STR         R2, [SP,#0x10+z]
STR         R3, [SP,#0x10+value]
LDR         R0, [SP,#0x10+value]
LDR         R1, [SP,#0x10+z]
LDR         R2, [SP,#0x10+y]
LDR         R3, [SP,#0x10+x]
MOV         R12, 2400
MUL.W       R3, R3, R12
MOV         R9, 120
MUL.W       R2, R2, R9
LSLS        R1, R1, #2 ; R1=R1<<2
STR         R0, [R1] ; R1 - address of array element
; deallocate place in local stack, allocated for 4 values of int type
BX          LR
``````

#### ARM + Optimizing Xcode (LLVM) + thumb mode

Listing 18.8: Optimizing Xcode (LLVM) + thumb mode

``````_insert
MOVW        R9, #0x10FC
MOV.W       R12, #2400
MOVT.W      R9, #0
RSB.W       R1, R1, R1,LSL#4    ; R1 - y. R1=y<<4 - y = y*16 - y = y*15
ADD         R9, PC ; R9 = pointer to a array
LDR.W       R9, [R9]
MLA.W       R0, R0, R12, R9     ; R0 - x, R12 - 2400, R9 - pointer to a. R0=x*2400 + ptr to a
ADD.W       R0, R0, R1,LSL#3    ; R0 = R0+R1<<3 = R0+R1*8 = x*2400 + ptr to a + y*15*8 =
; ptr to a + y*30*4 + x*600*4
STR.W       R3, [R0,R2,LSL#2]   ; R2 - z, R3 - value. address=R0+z*4 =
; ptr to a + y*30*4 + x*600*4 + z*4
BX          LR
``````