tbb_machine.h

00001 /*
00002     Copyright 2005-2010 Intel Corporation.  All Rights Reserved.
00003 
00004     The source code contained or described herein and all documents related
00005     to the source code ("Material") are owned by Intel Corporation or its
00006     suppliers or licensors.  Title to the Material remains with Intel
00007     Corporation or its suppliers and licensors.  The Material is protected
00008     by worldwide copyright laws and treaty provisions.  No part of the
00009     Material may be used, copied, reproduced, modified, published, uploaded,
00010     posted, transmitted, distributed, or disclosed in any way without
00011     Intel's prior express written permission.
00012 
00013     No license under any patent, copyright, trade secret or other
00014     intellectual property right is granted to or conferred upon you by
00015     disclosure or delivery of the Materials, either expressly, by
00016     implication, inducement, estoppel or otherwise.  Any license under such
00017     intellectual property rights must be express and approved by Intel in
00018     writing.
00019 */
00020 
00021 #ifndef __TBB_machine_H
00022 #define __TBB_machine_H
00023 
00024 #include "tbb_stddef.h"
00025 
00026 #if _WIN32||_WIN64
00027 
00028 #ifdef _MANAGED
00029 #pragma managed(push, off)
00030 #endif
00031 
00032 #if __MINGW64__
00033 #include "machine/linux_intel64.h"
00034 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00035 #define __TBB_Yield()  SwitchToThread()
00036 #elif __MINGW32__
00037 #include "machine/linux_ia32.h"
00038 extern "C" __declspec(dllimport) int __stdcall SwitchToThread( void );
00039 #define __TBB_Yield()  SwitchToThread()
00040 #elif defined(_M_IX86)
00041 #include "machine/windows_ia32.h"
00042 #elif defined(_M_AMD64) 
00043 #include "machine/windows_intel64.h"
00044 #elif _XBOX 
00045 #include "machine/xbox360_ppc.h"
00046 #endif
00047 
00048 #ifdef _MANAGED
00049 #pragma managed(pop)
00050 #endif
00051 
00052 #elif __linux__ || __FreeBSD__
00053 
00054 #if __i386__
00055 #include "machine/linux_ia32.h"
00056 #elif __x86_64__
00057 #include "machine/linux_intel64.h"
00058 #elif __ia64__
00059 #include "machine/linux_ia64.h"
00060 #elif __powerpc__
00061 #include "machine/mac_ppc.h"
00062 #endif
00063 #include "machine/linux_common.h"
00064 
00065 #elif __APPLE__
00066 
00067 #if __i386__
00068 #include "machine/linux_ia32.h"
00069 #elif __x86_64__
00070 #include "machine/linux_intel64.h"
00071 #elif __POWERPC__
00072 #include "machine/mac_ppc.h"
00073 #endif
00074 #include "machine/macos_common.h"
00075 
00076 #elif _AIX
00077 
00078 #include "machine/ibm_aix51.h"
00079 
00080 #elif __sun || __SUNPRO_CC
00081 
00082 #define __asm__ asm 
00083 #define __volatile__ volatile
00084 #if __i386  || __i386__
00085 #include "machine/linux_ia32.h"
00086 #elif __x86_64__
00087 #include "machine/linux_intel64.h"
00088 #elif __sparc
00089 #include "machine/sunos_sparc.h"
00090 #endif
00091 #include <sched.h>
00092 #define __TBB_Yield() sched_yield()
00093 
00094 #endif
00095 
00097 
00109 #if    !defined(__TBB_CompareAndSwap4) \
00110     || !defined(__TBB_CompareAndSwap8) \
00111     || !defined(__TBB_Yield)           \
00112     || !defined(__TBB_full_memory_fence)    \
00113     || !defined(__TBB_release_consistency_helper)
00114 #error Minimal requirements for tbb_machine.h not satisfied; platform is not supported.
00115 #endif
00116 
00117 #ifndef __TBB_Pause
00118     inline void __TBB_Pause(int32_t) {
00119         __TBB_Yield();
00120     }
00121 #endif
00122 
00123 namespace tbb {
00124 
00126 inline void atomic_fence () { __TBB_full_memory_fence(); }
00127 
00128 namespace internal {
00129 
00131 
00132 class atomic_backoff : no_copy {
00134 
00136     static const int32_t LOOPS_BEFORE_YIELD = 16;
00137     int32_t count;
00138 public:
00139     atomic_backoff() : count(1) {}
00140 
00142     void pause() {
00143         if( count<=LOOPS_BEFORE_YIELD ) {
00144             __TBB_Pause(count);
00145             // Pause twice as long the next time.
00146             count*=2;
00147         } else {
00148             // Pause is so long that we might as well yield CPU to scheduler.
00149             __TBB_Yield();
00150         }
00151     }
00152 
00153     // pause for a few times and then return false immediately.
00154     bool bounded_pause() {
00155         if( count<=LOOPS_BEFORE_YIELD ) {
00156             __TBB_Pause(count);
00157             // Pause twice as long the next time.
00158             count*=2;
00159             return true;
00160         } else {
00161             return false;
00162         }
00163     }
00164 
00165     void reset() {
00166         count = 1;
00167     }
00168 };
00169 
00171 
00172 template<typename T, typename U>
00173 void spin_wait_while_eq( const volatile T& location, U value ) {
00174     atomic_backoff backoff;
00175     while( location==value ) backoff.pause();
00176 }
00177 
00179 
00180 template<typename T, typename U>
00181 void spin_wait_until_eq( const volatile T& location, const U value ) {
00182     atomic_backoff backoff;
00183     while( location!=value ) backoff.pause();
00184 }
00185 
00186 // T should be unsigned, otherwise sign propagation will break correctness of bit manipulations.
00187 // S should be either 1 or 2, for the mask calculation to work correctly.
00188 // Together, these rules limit applicability of Masked CAS to unsigned char and unsigned short.
00189 template<size_t S, typename T>
00190 inline T __TBB_MaskedCompareAndSwap (volatile T *ptr, T value, T comparand ) {
00191     volatile uint32_t * base = (uint32_t*)( (uintptr_t)ptr & ~(uintptr_t)0x3 );
00192 #if __TBB_BIG_ENDIAN
00193     const uint8_t bitoffset = uint8_t( 8*( 4-S - (uintptr_t(ptr) & 0x3) ) );
00194 #else
00195     const uint8_t bitoffset = uint8_t( 8*((uintptr_t)ptr & 0x3) );
00196 #endif
00197     const uint32_t mask = ( (1<<(S*8)) - 1 )<<bitoffset;
00198     atomic_backoff b;
00199     uint32_t result;
00200     for(;;) {
00201         result = *base; // reload the base value which might change during the pause
00202         uint32_t old_value = ( result & ~mask ) | ( comparand << bitoffset );
00203         uint32_t new_value = ( result & ~mask ) | ( value << bitoffset );
00204         // __TBB_CompareAndSwap4 presumed to have full fence. 
00205         result = __TBB_CompareAndSwap4( base, new_value, old_value );
00206         if(  result==old_value               // CAS succeeded
00207           || ((result^old_value)&mask)!=0 )  // CAS failed and the bits of interest have changed
00208             break;
00209         else                                 // CAS failed but the bits of interest left unchanged
00210             b.pause();
00211     }
00212     return T((result & mask) >> bitoffset);
00213 }
00214 
00215 template<size_t S, typename T>
00216 inline T __TBB_CompareAndSwapGeneric (volatile void *ptr, T value, T comparand ) { 
00217     return __TBB_CompareAndSwapW((T *)ptr,value,comparand);
00218 }
00219 
00220 template<>
00221 inline uint8_t __TBB_CompareAndSwapGeneric <1,uint8_t> (volatile void *ptr, uint8_t value, uint8_t comparand ) {
00222 #ifdef __TBB_CompareAndSwap1
00223     return __TBB_CompareAndSwap1(ptr,value,comparand);
00224 #else
00225     return __TBB_MaskedCompareAndSwap<1,uint8_t>((volatile uint8_t *)ptr,value,comparand);
00226 #endif
00227 }
00228 
00229 template<>
00230 inline uint16_t __TBB_CompareAndSwapGeneric <2,uint16_t> (volatile void *ptr, uint16_t value, uint16_t comparand ) {
00231 #ifdef __TBB_CompareAndSwap2
00232     return __TBB_CompareAndSwap2(ptr,value,comparand);
00233 #else
00234     return __TBB_MaskedCompareAndSwap<2,uint16_t>((volatile uint16_t *)ptr,value,comparand);
00235 #endif
00236 }
00237 
00238 template<>
00239 inline uint32_t __TBB_CompareAndSwapGeneric <4,uint32_t> (volatile void *ptr, uint32_t value, uint32_t comparand ) { 
00240     return __TBB_CompareAndSwap4(ptr,value,comparand);
00241 }
00242 
00243 template<>
00244 inline uint64_t __TBB_CompareAndSwapGeneric <8,uint64_t> (volatile void *ptr, uint64_t value, uint64_t comparand ) { 
00245     return __TBB_CompareAndSwap8(ptr,value,comparand);
00246 }
00247 
00248 template<size_t S, typename T>
00249 inline T __TBB_FetchAndAddGeneric (volatile void *ptr, T addend) {
00250     atomic_backoff b;
00251     T result;
00252     for(;;) {
00253         result = *reinterpret_cast<volatile T *>(ptr);
00254         // __TBB_CompareAndSwapGeneric presumed to have full fence. 
00255         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, result+addend, result )==result ) 
00256             break;
00257         b.pause();
00258     }
00259     return result;
00260 }
00261 
00262 template<size_t S, typename T>
00263 inline T __TBB_FetchAndStoreGeneric (volatile void *ptr, T value) {
00264     atomic_backoff b;
00265     T result;
00266     for(;;) {
00267         result = *reinterpret_cast<volatile T *>(ptr);
00268         // __TBB_CompareAndSwapGeneric presumed to have full fence.
00269         if( __TBB_CompareAndSwapGeneric<S,T> ( ptr, value, result )==result ) 
00270             break;
00271         b.pause();
00272     }
00273     return result;
00274 }
00275 
00276 // Macro __TBB_TypeWithAlignmentAtLeastAsStrict(T) should be a type with alignment at least as 
00277 // strict as type T.  Type type should have a trivial default constructor and destructor, so that
00278 // arrays of that type can be declared without initializers.  
00279 // It is correct (but perhaps a waste of space) if __TBB_TypeWithAlignmentAtLeastAsStrict(T) expands
00280 // to a type bigger than T.
00281 // The default definition here works on machines where integers are naturally aligned and the
00282 // strictest alignment is 16.
00283 #ifndef __TBB_TypeWithAlignmentAtLeastAsStrict
00284 
00285 #if __GNUC__ || __SUNPRO_CC
00286 struct __TBB_machine_type_with_strictest_alignment {
00287     int member[4];
00288 } __attribute__((aligned(16)));
00289 #elif _MSC_VER
00290 __declspec(align(16)) struct __TBB_machine_type_with_strictest_alignment {
00291     int member[4];
00292 };
00293 #else
00294 #error Must define __TBB_TypeWithAlignmentAtLeastAsStrict(T) or __TBB_machine_type_with_strictest_alignment
00295 #endif
00296 
00297 template<size_t N> struct type_with_alignment {__TBB_machine_type_with_strictest_alignment member;};
00298 template<> struct type_with_alignment<1> { char member; };
00299 template<> struct type_with_alignment<2> { uint16_t member; };
00300 template<> struct type_with_alignment<4> { uint32_t member; };
00301 template<> struct type_with_alignment<8> { uint64_t member; };
00302 
00303 #if _MSC_VER||defined(__GNUC__)&&__GNUC__==3 && __GNUC_MINOR__<=2  
00305 
00307 template<size_t Size, typename T> 
00308 struct work_around_alignment_bug {
00309 #if _MSC_VER
00310     static const size_t alignment = __alignof(T);
00311 #else
00312     static const size_t alignment = __alignof__(T);
00313 #endif
00314 };
00315 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<tbb::internal::work_around_alignment_bug<sizeof(T),T>::alignment>
00316 #elif __GNUC__ || __SUNPRO_CC
00317 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) tbb::internal::type_with_alignment<__alignof__(T)>
00318 #else
00319 #define __TBB_TypeWithAlignmentAtLeastAsStrict(T) __TBB_machine_type_with_strictest_alignment
00320 #endif
00321 #endif  /* ____TBB_TypeWithAlignmentAtLeastAsStrict */
00322 
00323 // Template class here is to avoid instantiation of the static data for modules that don't use it
00324 template<typename T>
00325 struct reverse {
00326     static const T byte_table[256];
00327 };
00328 // An efficient implementation of the reverse function utilizes a 2^8 lookup table holding the bit-reversed
00329 // values of [0..2^8 - 1]. Those values can also be computed on the fly at a slightly higher cost.
00330 template<typename T>
00331 const T reverse<T>::byte_table[256] = {
00332     0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0, 0x70, 0xF0,
00333     0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8, 0x38, 0xB8, 0x78, 0xF8,
00334     0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94, 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4,
00335     0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC, 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC,
00336     0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2, 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2,
00337     0x0A, 0x8A, 0x4A, 0xCA, 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA,
00338     0x06, 0x86, 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
00339     0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE, 0x7E, 0xFE,
00340     0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1, 0x31, 0xB1, 0x71, 0xF1,
00341     0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99, 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9,
00342     0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5, 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5,
00343     0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD, 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD,
00344     0x03, 0x83, 0x43, 0xC3, 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3,
00345     0x0B, 0x8B, 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
00346     0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7, 0x77, 0xF7,
00347     0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF, 0x3F, 0xBF, 0x7F, 0xFF
00348 };
00349 
00350 } // namespace internal
00351 } // namespace tbb
00352 
00353 #ifndef __TBB_CompareAndSwap1
00354 #define __TBB_CompareAndSwap1 tbb::internal::__TBB_CompareAndSwapGeneric<1,uint8_t>
00355 #endif
00356 
00357 #ifndef __TBB_CompareAndSwap2 
00358 #define __TBB_CompareAndSwap2 tbb::internal::__TBB_CompareAndSwapGeneric<2,uint16_t>
00359 #endif
00360 
00361 #ifndef __TBB_CompareAndSwapW
00362 #define __TBB_CompareAndSwapW tbb::internal::__TBB_CompareAndSwapGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00363 #endif
00364 
00365 #ifndef __TBB_FetchAndAdd1
00366 #define __TBB_FetchAndAdd1 tbb::internal::__TBB_FetchAndAddGeneric<1,uint8_t>
00367 #endif
00368 
00369 #ifndef __TBB_FetchAndAdd2
00370 #define __TBB_FetchAndAdd2 tbb::internal::__TBB_FetchAndAddGeneric<2,uint16_t>
00371 #endif
00372 
00373 #ifndef __TBB_FetchAndAdd4
00374 #define __TBB_FetchAndAdd4 tbb::internal::__TBB_FetchAndAddGeneric<4,uint32_t>
00375 #endif
00376 
00377 #ifndef __TBB_FetchAndAdd8
00378 #define __TBB_FetchAndAdd8 tbb::internal::__TBB_FetchAndAddGeneric<8,uint64_t>
00379 #endif
00380 
00381 #ifndef __TBB_FetchAndAddW
00382 #define __TBB_FetchAndAddW tbb::internal::__TBB_FetchAndAddGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00383 #endif
00384 
00385 #ifndef __TBB_FetchAndStore1
00386 #define __TBB_FetchAndStore1 tbb::internal::__TBB_FetchAndStoreGeneric<1,uint8_t>
00387 #endif
00388 
00389 #ifndef __TBB_FetchAndStore2
00390 #define __TBB_FetchAndStore2 tbb::internal::__TBB_FetchAndStoreGeneric<2,uint16_t>
00391 #endif
00392 
00393 #ifndef __TBB_FetchAndStore4
00394 #define __TBB_FetchAndStore4 tbb::internal::__TBB_FetchAndStoreGeneric<4,uint32_t>
00395 #endif
00396 
00397 #ifndef __TBB_FetchAndStore8
00398 #define __TBB_FetchAndStore8 tbb::internal::__TBB_FetchAndStoreGeneric<8,uint64_t>
00399 #endif
00400 
00401 #ifndef __TBB_FetchAndStoreW
00402 #define __TBB_FetchAndStoreW tbb::internal::__TBB_FetchAndStoreGeneric<sizeof(ptrdiff_t),ptrdiff_t>
00403 #endif
00404 
00405 #if __TBB_DECL_FENCED_ATOMICS
00406 
00407 #ifndef __TBB_CompareAndSwap1__TBB_full_fence
00408 #define __TBB_CompareAndSwap1__TBB_full_fence __TBB_CompareAndSwap1
00409 #endif 
00410 #ifndef __TBB_CompareAndSwap1acquire
00411 #define __TBB_CompareAndSwap1acquire __TBB_CompareAndSwap1__TBB_full_fence
00412 #endif 
00413 #ifndef __TBB_CompareAndSwap1release
00414 #define __TBB_CompareAndSwap1release __TBB_CompareAndSwap1__TBB_full_fence
00415 #endif 
00416 
00417 #ifndef __TBB_CompareAndSwap2__TBB_full_fence
00418 #define __TBB_CompareAndSwap2__TBB_full_fence __TBB_CompareAndSwap2
00419 #endif
00420 #ifndef __TBB_CompareAndSwap2acquire
00421 #define __TBB_CompareAndSwap2acquire __TBB_CompareAndSwap2__TBB_full_fence
00422 #endif
00423 #ifndef __TBB_CompareAndSwap2release
00424 #define __TBB_CompareAndSwap2release __TBB_CompareAndSwap2__TBB_full_fence
00425 #endif
00426 
00427 #ifndef __TBB_CompareAndSwap4__TBB_full_fence
00428 #define __TBB_CompareAndSwap4__TBB_full_fence __TBB_CompareAndSwap4
00429 #endif 
00430 #ifndef __TBB_CompareAndSwap4acquire
00431 #define __TBB_CompareAndSwap4acquire __TBB_CompareAndSwap4__TBB_full_fence
00432 #endif 
00433 #ifndef __TBB_CompareAndSwap4release
00434 #define __TBB_CompareAndSwap4release __TBB_CompareAndSwap4__TBB_full_fence
00435 #endif 
00436 
00437 #ifndef __TBB_CompareAndSwap8__TBB_full_fence
00438 #define __TBB_CompareAndSwap8__TBB_full_fence __TBB_CompareAndSwap8
00439 #endif
00440 #ifndef __TBB_CompareAndSwap8acquire
00441 #define __TBB_CompareAndSwap8acquire __TBB_CompareAndSwap8__TBB_full_fence
00442 #endif
00443 #ifndef __TBB_CompareAndSwap8release
00444 #define __TBB_CompareAndSwap8release __TBB_CompareAndSwap8__TBB_full_fence
00445 #endif
00446 
00447 #ifndef __TBB_FetchAndAdd1__TBB_full_fence
00448 #define __TBB_FetchAndAdd1__TBB_full_fence __TBB_FetchAndAdd1
00449 #endif
00450 #ifndef __TBB_FetchAndAdd1acquire
00451 #define __TBB_FetchAndAdd1acquire __TBB_FetchAndAdd1__TBB_full_fence
00452 #endif
00453 #ifndef __TBB_FetchAndAdd1release
00454 #define __TBB_FetchAndAdd1release __TBB_FetchAndAdd1__TBB_full_fence
00455 #endif
00456 
00457 #ifndef __TBB_FetchAndAdd2__TBB_full_fence
00458 #define __TBB_FetchAndAdd2__TBB_full_fence __TBB_FetchAndAdd2
00459 #endif
00460 #ifndef __TBB_FetchAndAdd2acquire
00461 #define __TBB_FetchAndAdd2acquire __TBB_FetchAndAdd2__TBB_full_fence
00462 #endif
00463 #ifndef __TBB_FetchAndAdd2release
00464 #define __TBB_FetchAndAdd2release __TBB_FetchAndAdd2__TBB_full_fence
00465 #endif
00466 
00467 #ifndef __TBB_FetchAndAdd4__TBB_full_fence
00468 #define __TBB_FetchAndAdd4__TBB_full_fence __TBB_FetchAndAdd4
00469 #endif
00470 #ifndef __TBB_FetchAndAdd4acquire
00471 #define __TBB_FetchAndAdd4acquire __TBB_FetchAndAdd4__TBB_full_fence
00472 #endif
00473 #ifndef __TBB_FetchAndAdd4release
00474 #define __TBB_FetchAndAdd4release __TBB_FetchAndAdd4__TBB_full_fence
00475 #endif
00476 
00477 #ifndef __TBB_FetchAndAdd8__TBB_full_fence
00478 #define __TBB_FetchAndAdd8__TBB_full_fence __TBB_FetchAndAdd8
00479 #endif
00480 #ifndef __TBB_FetchAndAdd8acquire
00481 #define __TBB_FetchAndAdd8acquire __TBB_FetchAndAdd8__TBB_full_fence
00482 #endif
00483 #ifndef __TBB_FetchAndAdd8release
00484 #define __TBB_FetchAndAdd8release __TBB_FetchAndAdd8__TBB_full_fence
00485 #endif
00486 
00487 #ifndef __TBB_FetchAndStore1__TBB_full_fence
00488 #define __TBB_FetchAndStore1__TBB_full_fence __TBB_FetchAndStore1
00489 #endif
00490 #ifndef __TBB_FetchAndStore1acquire
00491 #define __TBB_FetchAndStore1acquire __TBB_FetchAndStore1__TBB_full_fence
00492 #endif
00493 #ifndef __TBB_FetchAndStore1release
00494 #define __TBB_FetchAndStore1release __TBB_FetchAndStore1__TBB_full_fence
00495 #endif
00496 
00497 #ifndef __TBB_FetchAndStore2__TBB_full_fence
00498 #define __TBB_FetchAndStore2__TBB_full_fence __TBB_FetchAndStore2
00499 #endif
00500 #ifndef __TBB_FetchAndStore2acquire
00501 #define __TBB_FetchAndStore2acquire __TBB_FetchAndStore2__TBB_full_fence
00502 #endif
00503 #ifndef __TBB_FetchAndStore2release
00504 #define __TBB_FetchAndStore2release __TBB_FetchAndStore2__TBB_full_fence
00505 #endif
00506 
00507 #ifndef __TBB_FetchAndStore4__TBB_full_fence
00508 #define __TBB_FetchAndStore4__TBB_full_fence __TBB_FetchAndStore4
00509 #endif
00510 #ifndef __TBB_FetchAndStore4acquire
00511 #define __TBB_FetchAndStore4acquire __TBB_FetchAndStore4__TBB_full_fence
00512 #endif
00513 #ifndef __TBB_FetchAndStore4release
00514 #define __TBB_FetchAndStore4release __TBB_FetchAndStore4__TBB_full_fence
00515 #endif
00516 
00517 #ifndef __TBB_FetchAndStore8__TBB_full_fence
00518 #define __TBB_FetchAndStore8__TBB_full_fence __TBB_FetchAndStore8
00519 #endif
00520 #ifndef __TBB_FetchAndStore8acquire
00521 #define __TBB_FetchAndStore8acquire __TBB_FetchAndStore8__TBB_full_fence
00522 #endif
00523 #ifndef __TBB_FetchAndStore8release
00524 #define __TBB_FetchAndStore8release __TBB_FetchAndStore8__TBB_full_fence
00525 #endif
00526 
00527 #endif // __TBB_DECL_FENCED_ATOMICS
00528 
00529 // Special atomic functions
00530 #ifndef __TBB_FetchAndAddWrelease
00531 #define __TBB_FetchAndAddWrelease __TBB_FetchAndAddW
00532 #endif
00533 
00534 #ifndef __TBB_FetchAndIncrementWacquire
00535 #define __TBB_FetchAndIncrementWacquire(P) __TBB_FetchAndAddW(P,1)
00536 #endif
00537 
00538 #ifndef __TBB_FetchAndDecrementWrelease
00539 #define __TBB_FetchAndDecrementWrelease(P) __TBB_FetchAndAddW(P,(-1))
00540 #endif
00541 
00542 template <typename T, size_t S>
00543 struct __TBB_machine_load_store {
00544     static inline T load_with_acquire(const volatile T& location) {
00545         T to_return = location;
00546         __TBB_release_consistency_helper();
00547         return to_return;
00548     }
00549 
00550     static inline void store_with_release(volatile T &location, T value) {
00551         __TBB_release_consistency_helper();
00552         location = value;
00553     }
00554 };
00555 
00556 #if __TBB_WORDSIZE==4
00557 #if _MSC_VER
00558 using tbb::internal::int64_t;
00559 #endif
00560 // On 32-bit platforms, there should be definition of __TBB_Store8 and __TBB_Load8
00561 #ifndef __TBB_Store8
00562 inline void __TBB_Store8 (volatile void *ptr, int64_t value) {
00563     for(;;) {
00564         int64_t result = *(int64_t *)ptr;
00565         if( __TBB_CompareAndSwap8(ptr,value,result)==result ) break;
00566     }
00567 }
00568 #endif
00569 
00570 #ifndef __TBB_Load8
00571 inline int64_t __TBB_Load8 (const volatile void *ptr) {
00572     const int64_t anyvalue = 3264; // Could be anything, just the same for comparand and new value
00573     return __TBB_CompareAndSwap8(const_cast<volatile void *>(ptr),anyvalue,anyvalue);
00574 }
00575 #endif
00576 
00577 template <typename T>
00578 struct __TBB_machine_load_store<T,8> {
00579     static inline T load_with_acquire(const volatile T& location) {
00580         T to_return = (T)__TBB_Load8((const volatile void*)&location);
00581         __TBB_release_consistency_helper();
00582         return to_return;
00583     }
00584 
00585     static inline void store_with_release(volatile T& location, T value) {
00586         __TBB_release_consistency_helper();
00587         __TBB_Store8((volatile void *)&location,(int64_t)value);
00588     }
00589 };
00590 #endif /* __TBB_WORDSIZE==4 */
00591 
00592 #ifndef __TBB_load_with_acquire
00593 template<typename T>
00594 inline T __TBB_load_with_acquire(const volatile T &location) {
00595     return __TBB_machine_load_store<T,sizeof(T)>::load_with_acquire(location);
00596 }
00597 #endif
00598 
00599 #ifndef __TBB_store_with_release
00600 template<typename T, typename V>
00601 inline void __TBB_store_with_release(volatile T& location, V value) {
00602     __TBB_machine_load_store<T,sizeof(T)>::store_with_release(location,T(value));
00603 }
00605 inline void __TBB_store_with_release(volatile size_t& location, size_t value) {
00606     __TBB_machine_load_store<size_t,sizeof(size_t)>::store_with_release(location,value);
00607 }
00608 #endif
00609 
00610 #ifndef __TBB_Log2
00611 inline intptr_t __TBB_Log2( uintptr_t x ) {
00612     if( x==0 ) return -1;
00613     intptr_t result = 0;
00614     uintptr_t tmp;
00615 #if __TBB_WORDSIZE>=8
00616     if( (tmp = x>>32) ) { x=tmp; result += 32; }
00617 #endif
00618     if( (tmp = x>>16) ) { x=tmp; result += 16; }
00619     if( (tmp = x>>8) )  { x=tmp; result += 8; }
00620     if( (tmp = x>>4) )  { x=tmp; result += 4; }
00621     if( (tmp = x>>2) )  { x=tmp; result += 2; }
00622     return (x&2)? result+1: result;
00623 }
00624 #endif
00625 
00626 #ifndef __TBB_AtomicOR
00627 inline void __TBB_AtomicOR( volatile void *operand, uintptr_t addend ) {
00628     tbb::internal::atomic_backoff b;
00629     for(;;) {
00630         uintptr_t tmp = *(volatile uintptr_t *)operand;
00631         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp|addend, tmp);
00632         if( result==tmp ) break;
00633         b.pause();
00634     }
00635 }
00636 #endif
00637 
00638 #ifndef __TBB_AtomicAND
00639 inline void __TBB_AtomicAND( volatile void *operand, uintptr_t addend ) {
00640     tbb::internal::atomic_backoff b;
00641     for(;;) {
00642         uintptr_t tmp = *(volatile uintptr_t *)operand;
00643         uintptr_t result = __TBB_CompareAndSwapW(operand, tmp&addend, tmp);
00644         if( result==tmp ) break;
00645         b.pause();
00646     }
00647 }
00648 #endif
00649 
00650 #ifndef __TBB_TryLockByte
00651 inline bool __TBB_TryLockByte( unsigned char &flag ) {
00652     return __TBB_CompareAndSwap1(&flag,1,0)==0;
00653 }
00654 #endif
00655 
00656 #ifndef __TBB_LockByte
00657 inline uintptr_t __TBB_LockByte( unsigned char& flag ) {
00658     if ( !__TBB_TryLockByte(flag) ) {
00659         tbb::internal::atomic_backoff b;
00660         do {
00661             b.pause();
00662         } while ( !__TBB_TryLockByte(flag) );
00663     }
00664     return 0;
00665 }
00666 #endif
00667 
00668 #ifndef __TBB_ReverseByte
00669 inline unsigned char __TBB_ReverseByte(unsigned char src) {
00670     return tbb::internal::reverse<unsigned char>::byte_table[src];
00671 }
00672 #endif
00673 
00674 template<typename T>
00675 T __TBB_ReverseBits(T src)
00676 {
00677     T dst;
00678     unsigned char *original = (unsigned char *) &src;
00679     unsigned char *reversed = (unsigned char *) &dst;
00680 
00681     for( int i = sizeof(T)-1; i >= 0; i-- )
00682         reversed[i] = __TBB_ReverseByte( original[sizeof(T)-i-1] );
00683 
00684     return dst;
00685 }
00686 
00687 #endif /* __TBB_machine_H */

Copyright © 2005-2010 Intel Corporation. All Rights Reserved.

Intel, Pentium, Intel Xeon, Itanium, Intel XScale and VTune are registered trademarks or trademarks of Intel Corporation or its subsidiaries in the United States and other countries.

* Other names and brands may be claimed as the property of others.