(possible answer at end after disassembly)
Sam Kong said:
Hello!
To measure some performance, I made the following code.
using System;
class Speed {
public static void Main() {
long start_time = DateTime.Now.Ticks;
double d = 0;
for (int i = 0; i < 1000000000; i++) {
d = d + i;
}
Console.WriteLine(d);
long end_time = DateTime.Now.Ticks;
TimeSpan time = new TimeSpan(end_time - start_time);
Console.WriteLine(time);
}
}
00000000 push ebp
00000001 mov ebp,esp
00000003 sub esp,14h
00000006 push edi
00000007 push esi
00000008 xor eax,eax
0000000a mov dword ptr [ebp-8],eax
0000000d mov dword ptr [ebp-4],eax
00000010 mov dword ptr [ebp-10h],eax
00000013 mov dword ptr [ebp-0Ch],eax
00000016 lea ecx,[ebp-10h]
00000019 call dword ptr ds:[79BA6C9Ch]
0000001f mov esi,dword ptr [ebp-10h]
00000022 mov edi,dword ptr [ebp-0Ch]
00000025 fldz
00000027 xor eax,eax
00000029 mov dword ptr [ebp-14h],eax
0000002c fild dword ptr [ebp-14h]
0000002f faddp st(1),st
00000031 inc eax
00000032 cmp eax,3B9ACA00h
00000037 jl 00000029
00000039 sub esp,8
0000003c fstp qword ptr [esp]
0000003f mov ecx,dword ptr ds:[05A4202Ch]
00000045 mov eax,dword ptr [ecx]
00000047 call dword ptr [eax+000000D0h]
0000004d lea ecx,[ebp-10h]
00000050 call dword ptr ds:[79BA6C9Ch]
00000056 mov ecx,dword ptr [ebp-10h]
00000059 mov edx,dword ptr [ebp-0Ch]
0000005c sub ecx,esi
0000005e sbb edx,edi
00000060 mov dword ptr [ebp-8],ecx
00000063 mov dword ptr [ebp-4],edx
00000066 mov ecx,79BB48B8h
0000006b call F97B1FC0
00000070 mov edx,eax
00000072 lea edi,[edx+4]
00000075 cmp ecx,dword ptr [edi]
00000077 lea esi,[ebp-8]
0000007a movs dword ptr [edi],dword ptr [esi]
0000007b movs dword ptr [edi],dword ptr [esi]
0000007c mov ecx,dword ptr ds:[05A4202Ch]
00000082 mov eax,dword ptr [ecx]
00000084 call dword ptr [eax+000000DCh]
0000008a call dword ptr ds:[79BB931Ch]
00000090 pop esi
00000091 pop edi
00000092 mov esp,ebp
00000094 pop ebp
00000095 ret
It takes about 3 seconds.
If I change the order of some lines, the result is very different.
using System;
class Speed {
public static void Main() {
long start_time = DateTime.Now.Ticks;
double d = 0;
for (int i = 0; i < 1000000000; i++) {
d = d + i;
}
long end_time = DateTime.Now.Ticks;
TimeSpan time = new TimeSpan(end_time - start_time);
Console.WriteLine(d);
Console.WriteLine(time);
}
}
00000000 push ebp
00000001 mov ebp,esp
00000003 and esp,0FFFFFFF8h
00000006 sub esp,20h
00000009 push edi
0000000a push esi
0000000b xor eax,eax
0000000d mov dword ptr [esp+10h],eax
00000011 mov dword ptr [esp+14h],eax
00000015 mov dword ptr [esp+18h],eax
00000019 mov dword ptr [esp+1Ch],eax
0000001d lea ecx,[esp+18h]
00000021 call dword ptr ds:[79BA6C9Ch]
00000027 mov esi,dword ptr [esp+18h]
0000002b mov edi,dword ptr [esp+1Ch]
0000002f fldz
00000031 fstp qword ptr [esp+8]
00000035 xor eax,eax
00000037 mov dword ptr [esp+20h],eax
0000003b fild dword ptr [esp+20h]
0000003f fadd qword ptr [esp+8]
00000043 fstp qword ptr [esp+8]
00000047 inc eax
00000048 cmp eax,3B9ACA00h
0000004d jl 00000037
0000004f lea ecx,[esp+18h]
00000053 call dword ptr ds:[79BA6C9Ch]
00000059 mov ecx,dword ptr [esp+18h]
0000005d mov edx,dword ptr [esp+1Ch]
00000061 sub ecx,esi
00000063 sbb edx,edi
00000065 mov dword ptr [esp+10h],ecx
00000069 mov dword ptr [esp+14h],edx
0000006d push dword ptr [esp+0Ch]
00000071 push dword ptr [esp+0Ch]
00000075 mov ecx,dword ptr ds:[05A42038h]
0000007b mov eax,dword ptr [ecx]
0000007d call dword ptr [eax+000000D0h]
00000083 mov ecx,79BB48B8h
00000088 call F97B1FC0
0000008d mov edx,eax
0000008f lea edi,[edx+4]
00000092 cmp ecx,dword ptr [edi]
00000094 lea esi,[esp+10h]
00000098 movs dword ptr [edi],dword ptr [esi]
00000099 movs dword ptr [edi],dword ptr [esi]
0000009a mov ecx,dword ptr ds:[05A42038h]
000000a0 mov eax,dword ptr [ecx]
000000a2 call dword ptr [eax+000000DCh]
000000a8 call dword ptr ds:[79BB931Ch]
000000ae pop esi
000000af pop edi
000000b0 mov esp,ebp
000000b2 pop ebp
000000b3 ret
It takes about 11 seconds.
Is it only me or general?
I don't understand why it makes such a big difference.
I'm guessing (and this is only a guess; never been any good with X86 but
I've included it above to save others time) that in the first example d can
be held in a register, whereas in the second it may be spilled by the
intervening function calls and so must be stored on the stack.
However I'd be interested in a non-guessed answer.
Stu