/** * The atomic module provides basic support for lock-free * concurrent programming. * * Copyright: Copyright Sean Kelly 2005 - 2016. * License: $(LINK2 http://www.boost.org/LICENSE_1_0.txt, Boost License 1.0) * Authors: Sean Kelly, Alex Rønne Petersen * Source: $(DRUNTIMESRC core/_atomic.d) */ /* NOTE: This file has been patched from the original DMD distribution to * work with the GDC compiler. */ module core.atomic; version (D_InlineAsm_X86) { version = AsmX86; version = AsmX86_32; enum has64BitCAS = true; enum has128BitCAS = false; } else version (D_InlineAsm_X86_64) { version = AsmX86; version = AsmX86_64; enum has64BitCAS = true; enum has128BitCAS = true; } else version (GNU) { import gcc.config; enum has64BitCAS = GNU_Have_64Bit_Atomics; enum has128BitCAS = GNU_Have_LibAtomic; } else { enum has64BitCAS = false; enum has128BitCAS = false; } private { template HeadUnshared(T) { static if ( is( T U : shared(U*) ) ) alias shared(U)* HeadUnshared; else alias T HeadUnshared; } } version (AsmX86) { // NOTE: Strictly speaking, the x86 supports atomic operations on // unaligned values. However, this is far slower than the // common case, so such behavior should be prohibited. private bool atomicValueIsProperlyAligned(T)( ref T val ) pure nothrow @nogc @trusted { return atomicPtrIsProperlyAligned(&val); } private bool atomicPtrIsProperlyAligned(T)( T* ptr ) pure nothrow @nogc @safe { // NOTE: 32 bit x86 systems support 8 byte CAS, which only requires // 4 byte alignment, so use size_t as the align type here. static if ( T.sizeof > size_t.sizeof ) return cast(size_t)ptr % size_t.sizeof == 0; else return cast(size_t)ptr % T.sizeof == 0; } } version (CoreDdoc) { /** * Performs the binary operation 'op' on val using 'mod' as the modifier. * * Params: * val = The target variable. * mod = The modifier to apply. * * Returns: * The result of the operation. */ HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @safe if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) { return HeadUnshared!(T).init; } /** * Stores 'writeThis' to the memory referenced by 'here' if the value * referenced by 'here' is equal to 'ifThis'. This operation is both * lock-free and atomic. * * Params: * here = The address of the destination variable. * writeThis = The value to store. * ifThis = The comparison value. * * Returns: * true if the store occurred, false if not. */ bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); /// Ditto bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ); /// Ditto bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ); /** * Loads 'val' from memory and returns it. The memory barrier specified * by 'ms' is applied to the operation, which is fully sequenced by * default. Valid memory orders are MemoryOrder.raw, MemoryOrder.acq, * and MemoryOrder.seq. * * Params: * val = The target variable. * * Returns: * The value of 'val'. */ HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq,T)( ref const shared T val ) pure nothrow @nogc @safe { return HeadUnshared!(T).init; } /** * Writes 'newval' into 'val'. The memory barrier specified by 'ms' is * applied to the operation, which is fully sequenced by default. * Valid memory orders are MemoryOrder.raw, MemoryOrder.rel, and * MemoryOrder.seq. * * Params: * val = The target variable. * newval = The value to store. */ void atomicStore(MemoryOrder ms = MemoryOrder.seq,T,V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe if ( __traits( compiles, { val = newval; } ) ) { } /** * Specifies the memory ordering semantics of an atomic operation. */ enum MemoryOrder { raw, /// Not sequenced. acq, /// Hoist-load + hoist-store barrier. rel, /// Sink-load + sink-store barrier. seq, /// Fully sequenced (acquire + release). } deprecated("Please use MemoryOrder instead.") alias MemoryOrder msync; /** * Inserts a full load/store memory fence (on platforms that need it). This ensures * that all loads and stores before a call to this function are executed before any * loads and stores after the call. */ void atomicFence() nothrow @nogc; } else version (AsmX86_32) { // Uses specialized asm for fast fetch and add operations private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe if ( T.sizeof <= 4 ) { size_t tmp = mod; asm pure nothrow @nogc @trusted { mov EAX, tmp; mov EDX, val; } static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AL; } else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[EDX], AX; } else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[EDX], EAX; } asm pure nothrow @nogc @trusted { mov tmp, EAX; } return cast(T)tmp; } private HeadUnshared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe if ( T.sizeof <= 4) { return atomicFetchAdd(val, -mod); } HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) in { assert(atomicValueIsProperlyAligned(val)); } body { // binary operators // // + - * / % ^^ & // | ^ << >> >>> ~ in // == != < <= > >= static if ( op == "+" || op == "-" || op == "*" || op == "/" || op == "%" || op == "^^" || op == "&" || op == "|" || op == "^" || op == "<<" || op == ">>" || op == ">>>" || op == "~" || // skip "in" op == "==" || op == "!=" || op == "<" || op == "<=" || op == ">" || op == ">=" ) { HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val ); mixin( "return get " ~ op ~ " mod;" ); } else // assignment operators // // += -= *= /= %= ^^= &= // |= ^= <<= >>= >>>= ~= static if ( op == "+=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4) { return cast(T)(atomicFetchAdd!(T)(val, mod) + mod); } else static if ( op == "-=" && __traits(isIntegral, T) && T.sizeof <= 4 && V1.sizeof <= 4) { return cast(T)(atomicFetchSub!(T)(val, mod) - mod); } else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || op == "%=" || op == "^^=" || op == "&=" || op == "|=" || op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" { HeadUnshared!(T) get, set; do { get = set = atomicLoad!(MemoryOrder.raw)( val ); mixin( "set " ~ op ~ " mod;" ); } while ( !casByRef( val, get, set ) ); return set; } else { static assert( false, "Operation not supported." ); } } bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted { return cas(&value, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe in { assert( atomicPtrIsProperlyAligned( here ) ); } body { static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov DL, writeThis; mov AL, ifThis; mov ECX, here; lock; // lock always needed to make this op atomic cmpxchg [ECX], DL; setz AL; } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov DX, writeThis; mov AX, ifThis; mov ECX, here; lock; // lock always needed to make this op atomic cmpxchg [ECX], DX; setz AL; } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov EDX, writeThis; mov EAX, ifThis; mov ECX, here; lock; // lock always needed to make this op atomic cmpxchg [ECX], EDX; setz AL; } } else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// // 8 Byte CAS on a 32-Bit Processor ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { push EDI; push EBX; lea EDI, writeThis; mov EBX, [EDI]; mov ECX, 4[EDI]; lea EDI, ifThis; mov EAX, [EDI]; mov EDX, 4[EDI]; mov EDI, here; lock; // lock always needed to make this op atomic cmpxchg8b [EDI]; setz AL; pop EBX; pop EDI; } } else { static assert( false, "Invalid template type specified." ); } } enum MemoryOrder { raw, acq, rel, seq, } deprecated("Please use MemoryOrder instead.") alias MemoryOrder msync; private { // NOTE: x86 loads implicitly have acquire semantics so a memory // barrier is only necessary on releases. template needsLoadBarrier( MemoryOrder ms ) { enum bool needsLoadBarrier = ms == MemoryOrder.seq; } // NOTE: x86 stores implicitly have release semantics so a memory // barrier is only necessary on acquires. template needsStoreBarrier( MemoryOrder ms ) { enum bool needsStoreBarrier = ms == MemoryOrder.seq; } } HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe if (!__traits(isFloating, T)) { static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov DL, 0; mov AL, 0; mov ECX, val; lock; // lock always needed to make this op atomic cmpxchg [ECX], DL; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov AL, [EAX]; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov DX, 0; mov AX, 0; mov ECX, val; lock; // lock always needed to make this op atomic cmpxchg [ECX], DX; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov AX, [EAX]; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov EDX, 0; mov EAX, 0; mov ECX, val; lock; // lock always needed to make this op atomic cmpxchg [ECX], EDX; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov EAX, [EAX]; } } } else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// // 8 Byte Load on a 32-Bit Processor ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { push EDI; push EBX; mov EBX, 0; mov ECX, 0; mov EAX, 0; mov EDX, 0; mov EDI, val; lock; // lock always needed to make this op atomic cmpxchg8b [EDI]; pop EBX; pop EDI; } } else { static assert( false, "Invalid template type specified." ); } } void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe if ( __traits( compiles, { val = newval; } ) ) { static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov EAX, val; mov DL, newval; lock; xchg [EAX], DL; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov DL, newval; mov [EAX], DL; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov EAX, val; mov DX, newval; lock; xchg [EAX], DX; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov DX, newval; mov [EAX], DX; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov EAX, val; mov EDX, newval; lock; xchg [EAX], EDX; } } else { asm pure nothrow @nogc @trusted { mov EAX, val; mov EDX, newval; mov [EAX], EDX; } } } else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// // 8 Byte Store on a 32-Bit Processor ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { push EDI; push EBX; lea EDI, newval; mov EBX, [EDI]; mov ECX, 4[EDI]; mov EDI, val; mov EAX, [EDI]; mov EDX, 4[EDI]; L1: lock; // lock always needed to make this op atomic cmpxchg8b [EDI]; jne L1; pop EBX; pop EDI; } } else { static assert( false, "Invalid template type specified." ); } } void atomicFence() nothrow @nogc @safe { import core.cpuid; asm pure nothrow @nogc @trusted { naked; call sse2; test AL, AL; jne Lcpuid; // Fast path: We have SSE2, so just use mfence. mfence; jmp Lend; Lcpuid: // Slow path: We use cpuid to serialize. This is // significantly slower than mfence, but is the // only serialization facility we have available // on older non-SSE2 chips. push EBX; mov EAX, 0; cpuid; pop EBX; Lend: ret; } } } else version (AsmX86_64) { // Uses specialized asm for fast fetch and add operations private HeadUnshared!(T) atomicFetchAdd(T)( ref shared T val, size_t mod ) pure nothrow @nogc @trusted if ( __traits(isIntegral, T) ) in { assert( atomicValueIsProperlyAligned(val)); } body { size_t tmp = mod; asm pure nothrow @nogc @trusted { mov RAX, tmp; mov RDX, val; } static if (T.sizeof == 1) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AL; } else static if (T.sizeof == 2) asm pure nothrow @nogc @trusted { lock; xadd[RDX], AX; } else static if (T.sizeof == 4) asm pure nothrow @nogc @trusted { lock; xadd[RDX], EAX; } else static if (T.sizeof == 8) asm pure nothrow @nogc @trusted { lock; xadd[RDX], RAX; } asm pure nothrow @nogc @trusted { mov tmp, RAX; } return cast(T)tmp; } private HeadUnshared!(T) atomicFetchSub(T)( ref shared T val, size_t mod ) pure nothrow @nogc @safe if ( __traits(isIntegral, T) ) { return atomicFetchAdd(val, -mod); } HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) in { assert( atomicValueIsProperlyAligned(val)); } body { // binary operators // // + - * / % ^^ & // | ^ << >> >>> ~ in // == != < <= > >= static if ( op == "+" || op == "-" || op == "*" || op == "/" || op == "%" || op == "^^" || op == "&" || op == "|" || op == "^" || op == "<<" || op == ">>" || op == ">>>" || op == "~" || // skip "in" op == "==" || op == "!=" || op == "<" || op == "<=" || op == ">" || op == ">=" ) { HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val ); mixin( "return get " ~ op ~ " mod;" ); } else // assignment operators // // += -= *= /= %= ^^= &= // |= ^= <<= >>= >>>= ~= static if ( op == "+=" && __traits(isIntegral, T) && __traits(isIntegral, V1)) { return cast(T)(atomicFetchAdd!(T)(val, mod) + mod); } else static if ( op == "-=" && __traits(isIntegral, T) && __traits(isIntegral, V1)) { return cast(T)(atomicFetchSub!(T)(val, mod) - mod); } else static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || op == "%=" || op == "^^=" || op == "&=" || op == "|=" || op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" { HeadUnshared!(T) get, set; do { get = set = atomicLoad!(MemoryOrder.raw)( val ); mixin( "set " ~ op ~ " mod;" ); } while ( !casByRef( val, get, set ) ); return set; } else { static assert( false, "Operation not supported." ); } } bool casByRef(T,V1,V2)( ref T value, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted { return cas(&value, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe in { assert( atomicPtrIsProperlyAligned( here ) ); } body { static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov DL, writeThis; mov AL, ifThis; mov RCX, here; lock; // lock always needed to make this op atomic cmpxchg [RCX], DL; setz AL; } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov DX, writeThis; mov AX, ifThis; mov RCX, here; lock; // lock always needed to make this op atomic cmpxchg [RCX], DX; setz AL; } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte CAS ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov EDX, writeThis; mov EAX, ifThis; mov RCX, here; lock; // lock always needed to make this op atomic cmpxchg [RCX], EDX; setz AL; } } else static if ( T.sizeof == long.sizeof ) { ////////////////////////////////////////////////////////////////// // 8 Byte CAS on a 64-Bit Processor ////////////////////////////////////////////////////////////////// asm pure nothrow @nogc @trusted { mov RDX, writeThis; mov RAX, ifThis; mov RCX, here; lock; // lock always needed to make this op atomic cmpxchg [RCX], RDX; setz AL; } } else static if ( T.sizeof == long.sizeof*2 && has128BitCAS) { ////////////////////////////////////////////////////////////////// // 16 Byte CAS on a 64-Bit Processor ////////////////////////////////////////////////////////////////// version (Win64){ //Windows 64 calling convention uses different registers. //DMD appears to reverse the register order. asm pure nothrow @nogc @trusted { push RDI; push RBX; mov R9, writeThis; mov R10, ifThis; mov R11, here; mov RDI, R9; mov RBX, [RDI]; mov RCX, 8[RDI]; mov RDI, R10; mov RAX, [RDI]; mov RDX, 8[RDI]; mov RDI, R11; lock; cmpxchg16b [RDI]; setz AL; pop RBX; pop RDI; } }else{ asm pure nothrow @nogc @trusted { push RDI; push RBX; lea RDI, writeThis; mov RBX, [RDI]; mov RCX, 8[RDI]; lea RDI, ifThis; mov RAX, [RDI]; mov RDX, 8[RDI]; mov RDI, here; lock; // lock always needed to make this op atomic cmpxchg16b [RDI]; setz AL; pop RBX; pop RDI; } } } else { static assert( false, "Invalid template type specified." ); } } enum MemoryOrder { raw, acq, rel, seq, } deprecated("Please use MemoryOrder instead.") alias MemoryOrder msync; private { // NOTE: x86 loads implicitly have acquire semantics so a memory // barrier is only necessary on releases. template needsLoadBarrier( MemoryOrder ms ) { enum bool needsLoadBarrier = ms == MemoryOrder.seq; } // NOTE: x86 stores implicitly have release semantics so a memory // barrier is only necessary on acquires. template needsStoreBarrier( MemoryOrder ms ) { enum bool needsStoreBarrier = ms == MemoryOrder.seq; } } HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @safe if (!__traits(isFloating, T)) { static assert( ms != MemoryOrder.rel, "invalid MemoryOrder for atomicLoad()" ); static assert( __traits(isPOD, T), "argument to atomicLoad() must be POD" ); static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov DL, 0; mov AL, 0; mov RCX, val; lock; // lock always needed to make this op atomic cmpxchg [RCX], DL; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov AL, [RAX]; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov DX, 0; mov AX, 0; mov RCX, val; lock; // lock always needed to make this op atomic cmpxchg [RCX], DX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov AX, [RAX]; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov EDX, 0; mov EAX, 0; mov RCX, val; lock; // lock always needed to make this op atomic cmpxchg [RCX], EDX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov EAX, [RAX]; } } } else static if ( T.sizeof == long.sizeof ) { ////////////////////////////////////////////////////////////////// // 8 Byte Load ////////////////////////////////////////////////////////////////// static if ( needsLoadBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov RDX, 0; mov RAX, 0; mov RCX, val; lock; // lock always needed to make this op atomic cmpxchg [RCX], RDX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov RAX, [RAX]; } } } else static if ( T.sizeof == long.sizeof*2 && has128BitCAS ) { ////////////////////////////////////////////////////////////////// // 16 Byte Load on a 64-Bit Processor ////////////////////////////////////////////////////////////////// version (Win64){ size_t[2] retVal; asm pure nothrow @nogc @trusted { push RDI; push RBX; mov RDI, val; mov RBX, 0; mov RCX, 0; mov RAX, 0; mov RDX, 0; lock; // lock always needed to make this op atomic cmpxchg16b [RDI]; lea RDI, retVal; mov [RDI], RAX; mov 8[RDI], RDX; pop RBX; pop RDI; } static if (is(T:U[], U)) { pragma(inline, true) static typeof(return) toTrusted(size_t[2] retVal) @trusted { return *(cast(typeof(return)*) retVal.ptr); } return toTrusted(retVal); } else { return cast(typeof(return)) retVal; } }else{ asm pure nothrow @nogc @trusted { push RDI; push RBX; mov RBX, 0; mov RCX, 0; mov RAX, 0; mov RDX, 0; mov RDI, val; lock; // lock always needed to make this op atomic cmpxchg16b [RDI]; pop RBX; pop RDI; } } } else { static assert( false, "Invalid template type specified." ); } } void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @safe if ( __traits( compiles, { val = newval; } ) ) { static assert( ms != MemoryOrder.acq, "invalid MemoryOrder for atomicStore()" ); static assert( __traits(isPOD, T), "argument to atomicStore() must be POD" ); static if ( T.sizeof == byte.sizeof ) { ////////////////////////////////////////////////////////////////// // 1 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov RAX, val; mov DL, newval; lock; xchg [RAX], DL; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov DL, newval; mov [RAX], DL; } } } else static if ( T.sizeof == short.sizeof ) { ////////////////////////////////////////////////////////////////// // 2 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov RAX, val; mov DX, newval; lock; xchg [RAX], DX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov DX, newval; mov [RAX], DX; } } } else static if ( T.sizeof == int.sizeof ) { ////////////////////////////////////////////////////////////////// // 4 Byte Store ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov RAX, val; mov EDX, newval; lock; xchg [RAX], EDX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov EDX, newval; mov [RAX], EDX; } } } else static if ( T.sizeof == long.sizeof && has64BitCAS ) { ////////////////////////////////////////////////////////////////// // 8 Byte Store on a 64-Bit Processor ////////////////////////////////////////////////////////////////// static if ( needsStoreBarrier!(ms) ) { asm pure nothrow @nogc @trusted { mov RAX, val; mov RDX, newval; lock; xchg [RAX], RDX; } } else { asm pure nothrow @nogc @trusted { mov RAX, val; mov RDX, newval; mov [RAX], RDX; } } } else static if ( T.sizeof == long.sizeof*2 && has128BitCAS ) { ////////////////////////////////////////////////////////////////// // 16 Byte Store on a 64-Bit Processor ////////////////////////////////////////////////////////////////// version (Win64){ asm pure nothrow @nogc @trusted { push RDI; push RBX; mov R9, val; mov R10, newval; mov RDI, R10; mov RBX, [RDI]; mov RCX, 8[RDI]; mov RDI, R9; mov RAX, [RDI]; mov RDX, 8[RDI]; L1: lock; // lock always needed to make this op atomic cmpxchg16b [RDI]; jne L1; pop RBX; pop RDI; } }else{ asm pure nothrow @nogc @trusted { push RDI; push RBX; lea RDI, newval; mov RBX, [RDI]; mov RCX, 8[RDI]; mov RDI, val; mov RAX, [RDI]; mov RDX, 8[RDI]; L1: lock; // lock always needed to make this op atomic cmpxchg16b [RDI]; jne L1; pop RBX; pop RDI; } } } else { static assert( false, "Invalid template type specified." ); } } void atomicFence() nothrow @nogc @safe { // SSE2 is always present in 64-bit x86 chips. asm nothrow @nogc @trusted { naked; mfence; ret; } } } else version (GNU) { import gcc.builtins; HeadUnshared!(T) atomicOp(string op, T, V1)( ref shared T val, V1 mod ) pure nothrow @nogc @trusted if ( __traits( compiles, mixin( "*cast(T*)&val" ~ op ~ "mod" ) ) ) { // binary operators // // + - * / % ^^ & // | ^ << >> >>> ~ in // == != < <= > >= static if ( op == "+" || op == "-" || op == "*" || op == "/" || op == "%" || op == "^^" || op == "&" || op == "|" || op == "^" || op == "<<" || op == ">>" || op == ">>>" || op == "~" || // skip "in" op == "==" || op == "!=" || op == "<" || op == "<=" || op == ">" || op == ">=" ) { HeadUnshared!(T) get = atomicLoad!(MemoryOrder.raw)( val ); mixin( "return get " ~ op ~ " mod;" ); } else // assignment operators // // += -= *= /= %= ^^= &= // |= ^= <<= >>= >>>= ~= static if ( op == "+=" || op == "-=" || op == "*=" || op == "/=" || op == "%=" || op == "^^=" || op == "&=" || op == "|=" || op == "^=" || op == "<<=" || op == ">>=" || op == ">>>=" ) // skip "~=" { HeadUnshared!(T) get, set; do { get = set = atomicLoad!(MemoryOrder.raw)( val ); mixin( "set " ~ op ~ " mod;" ); } while ( !cas( &val, get, set ) ); return set; } else { static assert( false, "Operation not supported." ); } } bool cas(T,V1,V2)( shared(T)* here, const V1 ifThis, V2 writeThis ) pure nothrow @nogc @safe if ( !is(T == class) && !is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1) ifThis, shared(V2) writeThis ) pure nothrow @nogc @safe if ( is(T == class) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } bool cas(T,V1,V2)( shared(T)* here, const shared(V1)* ifThis, shared(V2)* writeThis ) pure nothrow @nogc @safe if ( is(T U : U*) && __traits( compiles, { *here = writeThis; } ) ) { return casImpl(here, ifThis, writeThis); } private bool casImpl(T,V1,V2)( shared(T)* here, V1 ifThis, V2 writeThis ) pure nothrow @nogc @trusted { bool res = void; static if (GNU_Have_Atomics || GNU_Have_LibAtomic) { static if (T.sizeof == byte.sizeof) { res = __atomic_compare_exchange_1(here, cast(void*) &ifThis, *cast(ubyte*) &writeThis, false, MemoryOrder.seq, MemoryOrder.seq); } else static if (T.sizeof == short.sizeof) { res = __atomic_compare_exchange_2(here, cast(void*) &ifThis, *cast(ushort*) &writeThis, false, MemoryOrder.seq, MemoryOrder.seq); } else static if (T.sizeof == int.sizeof) { res = __atomic_compare_exchange_4(here, cast(void*) &ifThis, *cast(uint*) &writeThis, false, MemoryOrder.seq, MemoryOrder.seq); } else static if (T.sizeof == long.sizeof && GNU_Have_64Bit_Atomics) { res = __atomic_compare_exchange_8(here, cast(void*) &ifThis, *cast(ulong*) &writeThis, false, MemoryOrder.seq, MemoryOrder.seq); } else static if (GNU_Have_LibAtomic) { res = __atomic_compare_exchange(T.sizeof, here, cast(void*) &ifThis, cast(void*) &writeThis, MemoryOrder.seq, MemoryOrder.seq); } else static assert(0, "Invalid template type specified."); } else { static if (T.sizeof == byte.sizeof) alias U = byte; else static if (T.sizeof == short.sizeof) alias U = short; else static if (T.sizeof == int.sizeof) alias U = int; else static if (T.sizeof == long.sizeof) alias U = long; else static assert(0, "Invalid template type specified."); getAtomicMutex.lock(); scope(exit) getAtomicMutex.unlock(); if (*cast(U*)here == *cast(U*)&ifThis) { *here = writeThis; res = true; } else res = false; } return res; } // Memory model types for the __atomic* builtins. enum MemoryOrder { raw = 0, acq = 2, rel = 3, seq = 5, } deprecated("Please use MemoryOrder instead.") alias MemoryOrder msync; HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted if (!__traits(isFloating, T)) { static assert(ms != MemoryOrder.rel, "Invalid MemoryOrder for atomicLoad"); static assert(__traits(isPOD, T), "argument to atomicLoad() must be POD"); static if (GNU_Have_Atomics || GNU_Have_LibAtomic) { static if (T.sizeof == ubyte.sizeof) { ubyte value = __atomic_load_1(&val, ms); return *cast(HeadUnshared!T*) &value; } else static if (T.sizeof == ushort.sizeof) { ushort value = __atomic_load_2(&val, ms); return *cast(HeadUnshared!T*) &value; } else static if (T.sizeof == uint.sizeof) { uint value = __atomic_load_4(&val, ms); return *cast(HeadUnshared!T*) &value; } else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) { ulong value = __atomic_load_8(&val, ms); return *cast(HeadUnshared!T*) &value; } else static if (GNU_Have_LibAtomic) { T value; __atomic_load(T.sizeof, &val, cast(void*)&value, ms); return *cast(HeadUnshared!T*) &value; } else static assert(0, "Invalid template type specified."); } else { getAtomicMutex.lock(); scope(exit) getAtomicMutex.unlock(); return *cast(HeadUnshared!T*)&val; } } void atomicStore(MemoryOrder ms = MemoryOrder.seq, T, V1)( ref shared T val, V1 newval ) pure nothrow @nogc @trusted if ( __traits( compiles, { val = newval; } ) ) { static assert(ms != MemoryOrder.acq, "Invalid MemoryOrder for atomicStore"); static assert(__traits(isPOD, T), "argument to atomicLoad() must be POD"); static if (GNU_Have_Atomics || GNU_Have_LibAtomic) { static if (T.sizeof == ubyte.sizeof) { __atomic_store_1(&val, *cast(ubyte*) &newval, ms); } else static if (T.sizeof == ushort.sizeof) { __atomic_store_2(&val, *cast(ushort*) &newval, ms); } else static if (T.sizeof == uint.sizeof) { __atomic_store_4(&val, *cast(uint*) &newval, ms); } else static if (T.sizeof == ulong.sizeof && GNU_Have_64Bit_Atomics) { __atomic_store_8(&val, *cast(ulong*) &newval, ms); } else static if (GNU_Have_LibAtomic) { __atomic_store(T.sizeof, &val, cast(void*)&newval, ms); } else static assert(0, "Invalid template type specified."); } else { getAtomicMutex.lock(); val = newval; getAtomicMutex.unlock(); } } void atomicFence() nothrow @nogc { static if (GNU_Have_Atomics || GNU_Have_LibAtomic) __atomic_thread_fence(MemoryOrder.seq); else { getAtomicMutex.lock(); getAtomicMutex.unlock(); } } static if (!GNU_Have_Atomics && !GNU_Have_LibAtomic) { // Use system mutex for atomics, faking the purity of the functions so // that they can be used in pure/nothrow/@safe code. extern (C) private pure @trusted @nogc nothrow { static if (GNU_Thread_Model == ThreadModel.Posix) { import core.sys.posix.pthread; alias atomicMutexHandle = pthread_mutex_t; pragma(mangle, "pthread_mutex_init") int fakePureMutexInit(pthread_mutex_t*, pthread_mutexattr_t*); pragma(mangle, "pthread_mutex_lock") int fakePureMutexLock(pthread_mutex_t*); pragma(mangle, "pthread_mutex_unlock") int fakePureMutexUnlock(pthread_mutex_t*); } else static if (GNU_Thread_Model == ThreadModel.Win32) { import core.sys.windows.winbase; alias atomicMutexHandle = CRITICAL_SECTION; pragma(mangle, "InitializeCriticalSection") int fakePureMutexInit(CRITICAL_SECTION*); pragma(mangle, "EnterCriticalSection") void fakePureMutexLock(CRITICAL_SECTION*); pragma(mangle, "LeaveCriticalSection") int fakePureMutexUnlock(CRITICAL_SECTION*); } else { alias atomicMutexHandle = int; } } // Implements lock/unlock operations. private struct AtomicMutex { int lock() pure @trusted @nogc nothrow { static if (GNU_Thread_Model == ThreadModel.Posix) { if (!_inited) { fakePureMutexInit(&_handle, null); _inited = true; } return fakePureMutexLock(&_handle); } else { static if (GNU_Thread_Model == ThreadModel.Win32) { if (!_inited) { fakePureMutexInit(&_handle); _inited = true; } fakePureMutexLock(&_handle); } return 0; } } int unlock() pure @trusted @nogc nothrow { static if (GNU_Thread_Model == ThreadModel.Posix) return fakePureMutexUnlock(&_handle); else { static if (GNU_Thread_Model == ThreadModel.Win32) fakePureMutexUnlock(&_handle); return 0; } } private: atomicMutexHandle _handle; bool _inited; } // Internal static mutex reference. private AtomicMutex* _getAtomicMutex() @trusted @nogc nothrow { __gshared static AtomicMutex mutex; return &mutex; } // Pure alias for _getAtomicMutex. pragma(mangle, _getAtomicMutex.mangleof) private AtomicMutex* getAtomicMutex() pure @trusted @nogc nothrow @property; } } // This is an ABI adapter that works on all architectures. It type puns // floats and doubles to ints and longs, atomically loads them, then puns // them back. This is necessary so that they get returned in floating // point instead of integer registers. HeadUnshared!(T) atomicLoad(MemoryOrder ms = MemoryOrder.seq, T)( ref const shared T val ) pure nothrow @nogc @trusted if (__traits(isFloating, T)) { static if (T.sizeof == int.sizeof) { static assert(is(T : float)); auto ptr = cast(const shared int*) &val; auto asInt = atomicLoad!(ms)(*ptr); return *(cast(typeof(return)*) &asInt); } else static if (T.sizeof == long.sizeof) { static assert(is(T : double)); auto ptr = cast(const shared long*) &val; auto asLong = atomicLoad!(ms)(*ptr); return *(cast(typeof(return)*) &asLong); } else { static assert(0, "Cannot atomically load 80-bit reals."); } } //////////////////////////////////////////////////////////////////////////////// // Unit Tests //////////////////////////////////////////////////////////////////////////////// version (unittest) { void testCAS(T)( T val ) pure nothrow @nogc @trusted in { assert(val !is T.init); } body { T base = cast(T)null; shared(T) atom = cast(shared(T))null; assert( base !is val, T.stringof ); assert( atom is base, T.stringof ); assert( cas( &atom, base, val ), T.stringof ); assert( atom is val, T.stringof ); assert( !cas( &atom, base, base ), T.stringof ); assert( atom is val, T.stringof ); } void testLoadStore(MemoryOrder ms = MemoryOrder.seq, T)( T val = T.init + 1 ) pure nothrow @nogc @trusted { T base = cast(T) 0; shared(T) atom = cast(T) 0; assert( base !is val ); assert( atom is base ); atomicStore!(ms)( atom, val ); base = atomicLoad!(ms)( atom ); assert( base is val, T.stringof ); assert( atom is val ); } void testType(T)( T val = T.init + 1 ) pure nothrow @nogc @safe { testCAS!(T)( val ); testLoadStore!(MemoryOrder.seq, T)( val ); testLoadStore!(MemoryOrder.raw, T)( val ); } @safe pure nothrow unittest { testType!(bool)(); testType!(byte)(); testType!(ubyte)(); testType!(short)(); testType!(ushort)(); testType!(int)(); testType!(uint)(); testType!(shared int*)(); static class Klass {} testCAS!(shared Klass)( new shared(Klass) ); testType!(float)(1.0f); static if ( has64BitCAS ) { testType!(double)(1.0); testType!(long)(); testType!(ulong)(); } shared(size_t) i; atomicOp!"+="( i, cast(size_t) 1 ); assert( i == 1 ); atomicOp!"-="( i, cast(size_t) 1 ); assert( i == 0 ); shared float f = 0; atomicOp!"+="( f, 1 ); assert( f == 1 ); static if ( has64BitCAS ) { shared double d = 0; atomicOp!"+="( d, 1 ); assert( d == 1 ); } } pure nothrow unittest { static if (has128BitCAS) { struct DoubleValue { long value1; long value2; } align(16) shared DoubleValue a; atomicStore(a, DoubleValue(1,2)); assert(a.value1 == 1 && a.value2 ==2); while (!cas(&a, DoubleValue(1,2), DoubleValue(3,4))){} assert(a.value1 == 3 && a.value2 ==4); align(16) DoubleValue b = atomicLoad(a); assert(b.value1 == 3 && b.value2 ==4); } version (D_LP64) { enum hasDWCAS = has128BitCAS; } else { enum hasDWCAS = has64BitCAS; } static if (hasDWCAS) { static struct List { size_t gen; List* next; } shared(List) head; assert(cas(&head, shared(List)(0, null), shared(List)(1, cast(List*)1))); assert(head.gen == 1); assert(cast(size_t)head.next == 1); } } pure nothrow unittest { static struct S { int val; } auto s = shared(S)(1); shared(S*) ptr; // head unshared shared(S)* ifThis = null; shared(S)* writeThis = &s; assert(ptr is null); assert(cas(&ptr, ifThis, writeThis)); assert(ptr is writeThis); // head shared shared(S*) ifThis2 = writeThis; shared(S*) writeThis2 = null; assert(cas(&ptr, ifThis2, writeThis2)); assert(ptr is null); // head unshared target doesn't want atomic CAS shared(S)* ptr2; static assert(!__traits(compiles, cas(&ptr2, ifThis, writeThis))); static assert(!__traits(compiles, cas(&ptr2, ifThis2, writeThis2))); } unittest { import core.thread; // Use heap memory to ensure an optimizing // compiler doesn't put things in registers. uint* x = new uint(); bool* f = new bool(); uint* r = new uint(); auto thr = new Thread(() { while (!*f) { } atomicFence(); *r = *x; }); thr.start(); *x = 42; atomicFence(); *f = true; atomicFence(); thr.join(); assert(*r == 42); } // === atomicFetchAdd and atomicFetchSub operations ==== pure nothrow @nogc @safe unittest { shared ubyte u8 = 1; shared ushort u16 = 2; shared uint u32 = 3; shared byte i8 = 5; shared short i16 = 6; shared int i32 = 7; assert(atomicOp!"+="(u8, 8) == 9); assert(atomicOp!"+="(u16, 8) == 10); assert(atomicOp!"+="(u32, 8) == 11); assert(atomicOp!"+="(i8, 8) == 13); assert(atomicOp!"+="(i16, 8) == 14); assert(atomicOp!"+="(i32, 8) == 15); version (AsmX86_64) { shared ulong u64 = 4; shared long i64 = 8; assert(atomicOp!"+="(u64, 8) == 12); assert(atomicOp!"+="(i64, 8) == 16); } } pure nothrow @nogc @safe unittest { shared ubyte u8 = 1; shared ushort u16 = 2; shared uint u32 = 3; shared byte i8 = 5; shared short i16 = 6; shared int i32 = 7; assert(atomicOp!"-="(u8, 1) == 0); assert(atomicOp!"-="(u16, 1) == 1); assert(atomicOp!"-="(u32, 1) == 2); assert(atomicOp!"-="(i8, 1) == 4); assert(atomicOp!"-="(i16, 1) == 5); assert(atomicOp!"-="(i32, 1) == 6); version (AsmX86_64) { shared ulong u64 = 4; shared long i64 = 8; assert(atomicOp!"-="(u64, 1) == 3); assert(atomicOp!"-="(i64, 1) == 7); } } pure nothrow @nogc @safe unittest // issue 16651 { shared ulong a = 2; uint b = 1; atomicOp!"-="( a, b ); assert(a == 1); shared uint c = 2; ubyte d = 1; atomicOp!"-="( c, d ); assert(c == 1); } }