My good colleage, Mike.McCarty, put out code for review and stated that statics are thread-safe. As it is, sounded very scary but I trust Mike. Thus started exploring what has been done there.
here is the std document for the same: http://www.open-std.org/jtc1/sc22/wg21/docs/papers/2008/n2660.htm
First thing wrote some code to dump disassembly in VS2015:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
| static int function()
{
00007FF7610C1910 push rbp
00007FF7610C1912 push rdi
00007FF7610C1913 sub rsp,108h
00007FF7610C191A lea rbp,[rsp+20h]
00007FF7610C191F mov rdi,rsp
00007FF7610C1922 mov ecx,42h
00007FF7610C1927 mov eax,0CCCCCCCCh
00007FF7610C192C rep stos dword ptr [rdi]
00007FF7610C192E mov qword ptr [rbp+0C8h],0FFFFFFFFFFFFFFFEh
static int s_value = function2();
00007FF7610C1939 mov eax,104h
00007FF7610C193E mov eax,eax
00007FF7610C1940 mov ecx,dword ptr [_tls_index (07FF7610CC1C8h)]
00007FF7610C1946 mov rdx,qword ptr gs:[58h]
00007FF7610C194F mov rcx,qword ptr [rdx+rcx*8]
00007FF7610C1953 mov eax,dword ptr [rax+rcx]
00007FF7610C1956 cmp dword ptr [s_value+4h (07FF7610CC164h)],eax
00007FF7610C195C jle function+7Ah (07FF7610C198Ah)
00007FF7610C195E lea rcx,[s_value+4h (07FF7610CC164h)]
00007FF7610C1965 call _Init_thread_header (07FF7610C101Eh)
00007FF7610C196A cmp dword ptr [s_value+4h (07FF7610CC164h)],0FFFFFFFFh
00007FF7610C1971 jne function+7Ah (07FF7610C198Ah)
00007FF7610C1973 call function2 (07FF7610C18D0h)
00007FF7610C1978 mov dword ptr [s_value (07FF7610CC160h)],eax
00007FF7610C197E lea rcx,[s_value+4h (07FF7610CC164h)]
00007FF7610C1985 call _Init_thread_footer (07FF7610C1073h)
return s_value;
00007FF7610C198A mov eax,dword ptr [s_value (07FF7610CC160h)]
}
|
Now with this Mike asked is why is TLS even involved?
1
2
3
4
5
6
| 00007FF7610C1939 mov eax,104h
00007FF7610C193E mov eax,eax
00007FF7610C1940 mov ecx,dword ptr [_tls_index (07FF7610CC1C8h)]
00007FF7610C1946 mov rdx,qword ptr gs:[58h]
00007FF7610C194F mov rcx,qword ptr [rdx+rcx*8]
00007FF7610C1953 mov eax,dword ptr [rax+rcx]
|
Well my guess is that it is trying to reduce contention among threads initializing same static variable.
If you call the static function twice it will still go thru the this code as it does not know whether it is the first call or not.
1
2
3
4
5
6
7
8
9
| static int s_value = function2();
00007FF7610C1939 mov eax,104h
00007FF7610C193E mov eax,eax
00007FF7610C1940 mov ecx,dword ptr [_tls_index (07FF7610CC1C8h)]
00007FF7610C1946 mov rdx,qword ptr gs:[58h]
00007FF7610C194F mov rcx,qword ptr [rdx+rcx*8]
00007FF7610C1953 mov eax,dword ptr [rax+rcx]
00007FF7610C1956 cmp dword ptr [s_value+4h (07FF7610CC164h)],eax
00007FF7610C195C jle function+7Ah (07FF7610C198Ah)
|
In case of multiple threads calling the same function- there is a possibility of contention.
This is where TLS helps (DCLP I guess)
Two records are maintained & updated:
- Thread local to reduce contention & DCLP
- Global: s_value2 + 4h
Same can be seen, done a bit differently on linux (https://godbolt.org/g/GfKMQc):
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
| function2(): # @function2()
push rbp
mov rbp, rsp
sub rsp, 16
cmp byte ptr [guard variable for function2()::val], 0
jne .LBB1_4
movabs rdi, guard variable for function2()::val
call __cxa_guard_acquire
cmp eax, 0
je .LBB1_4
call function()
mov dword ptr [rbp - 16], eax # 4-byte Spill
jmp .LBB1_3
.LBB1_3:
movabs rdi, guard variable for function2()::val
mov eax, dword ptr [rbp - 16] # 4-byte Reload
mov dword ptr [function2()::val], eax
call __cxa_guard_release
.LBB1_4:
add rsp, 16
pop rbp
ret
movabs rdi, guard variable for function2()::val
mov ecx, edx
mov qword ptr [rbp - 8], rax
mov dword ptr [rbp - 12], ecx
call __cxa_guard_abort
mov rdi, qword ptr [rbp - 8]
call _Unwind_Resume
|
Here __cxa_guard_acquire
, __cxa_guard_release
and __cxa_guard_abort
do the magic.