blob: 2386dd6bffa07eae4661f7a2164c4d66fef198bb [file] [log] [blame]
/* cilk_fake.h -*-C++-*-
*
*************************************************************************
*
* @copyright
* Copyright (C) 2011-2013, Intel Corporation
* All rights reserved.
*
* @copyright
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* @copyright
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING,
* BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS
* OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
* AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
**************************************************************************/
/**
* @file cilk_fake.h
*
* @brief Macros to simulate a compiled Cilk program.
*
* Used carefully, these macros can be used to create a Cilk program with a
* non-Cilk compiler by manually inserting the code necessary for interacting
* with the Cilk runtime library. They are not intended to be pretty (you
* wouldn't want to write a whole program using these macros), but they are
* useful for experiments. They also work well as an illustration of what the
* compiler generates.
*
* Details of the mechanisms used in these macros are described in
* design-notes/CilkPlusABI.docx
*
* Example 1: fib in C++
* ---------------------
*
* #include <internal/cilk_fake.h>
*
* int fib(int n)
* {
* CILK_FAKE_PROLOG();
*
* if (n < 2)
* return n;
*
* int a, b;
* CILK_FAKE_SPAWN_R(a, fib(n - 1));
* b = fib(n - 2);
* CILK_FAKE_SYNC();
*
* return a + b;
* }
*
*
* Example 2: fib in C
* -------------------
*
* #include <internal/cilk_fake.h>
*
* int fib(int n);
*
* void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n)
* {
* CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf);
* *a = fib(n - 1);
* CILK_FAKE_SPAWN_HELPER_EPILOG();
* }
*
* int fib(int n)
* {
* CILK_FAKE_PROLOG();
*
* if (n < 2)
* return n;
*
* int a, b;
* CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n));
* b = fib(n - 2);
* CILK_FAKE_SYNC();
*
* CILK_FAKE_EPILOG();
* return a + b;
* }
*/
#ifndef INCLUDED_CILK_FAKE_DOT_H
#define INCLUDED_CILK_FAKE_DOT_H
// This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already
// defined but is less than 1, then the data structures in <internal/abi.h>
// will not match the expectations of facilities in this header. Therefore,
// for successful compilation, __CILKRTS_ABI_VERSION must either be not
// defined, or defined to be 1 or greater.
#ifndef __CILKRTS_ABI_VERSION
// ABI version was not specified. Set it to 1.
# define __CILKRTS_ABI_VERSION 1
#elif __CILKRTS_ABI_VERSION < 1
// ABI version was specified but was too old. Fail compilation.
# error cilk_fake.h requirs an ABI version of 1 or greater
#endif
#include <internal/abi.h>
// alloca is defined in malloc.h on Windows, alloca.h on Linux
#ifndef _MSC_VER
#include <alloca.h>
#else
#include <malloc.h>
// Define offsetof
#include <stddef.h>
#endif
// Allows use of a different version that the one defined in abi.h
#define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24)
/* Initialize frame. To be called when worker is known */
__CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf,
__cilkrts_worker *w)
{
sf->call_parent = w->current_stack_frame;
sf->worker = w;
sf->flags = CILK_FAKE_VERSION_FLAG;
w->current_stack_frame = sf;
}
/* Initialize frame. To be called when worker is not known */
__CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf)
{
__cilkrts_worker* w = __cilkrts_get_tls_worker();
uint32_t last_flag = 0;
if (! w) {
w = __cilkrts_bind_thread_1();
last_flag = CILK_FRAME_LAST;
}
__cilk_fake_enter_frame_fast(sf, w);
sf->flags |= last_flag;
}
/* Initialize frame. To be called within the spawn helper */
__CILKRTS_INLINE void __cilk_fake_helper_enter_frame(
__cilkrts_stack_frame *sf,
__cilkrts_stack_frame *parent_sf)
{
sf->worker = 0;
sf->call_parent = parent_sf;
}
/* Called from the spawn helper to push the parent continuation on the task
* deque so that it can be stolen.
*/
__CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf)
{
/* Initialize spawn helper frame.
* call_parent was saved in __cilk_fake_helper_enter_frame */
__cilkrts_stack_frame *parent = sf->call_parent;
__cilkrts_worker *w = parent->worker;
__cilk_fake_enter_frame_fast(sf, w);
/* Append a node to the pedigree */
sf->spawn_helper_pedigree = w->pedigree;
parent->parent_pedigree = w->pedigree;
w->pedigree.rank = 0;
w->pedigree.parent = &sf->spawn_helper_pedigree;
/* Push parent onto the task deque */
__cilkrts_stack_frame *volatile *tail = w->tail;
*tail++ = sf->call_parent;
/* The stores must be separated by a store fence (noop on x86)
* or the second store is a release (st8.rel on Itanium) */
w->tail = tail;
sf->flags |= CILK_FRAME_DETACHED;
}
/* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */
static int __cilk_fake_dummy = 8;
/* The following macro is used to force the compiler into generating a frame
* pointer. We never change the value of __cilk_fake_dummy, so the alloca()
* is never called, but we need the 'if' statement and the __cilk_fake_dummy
* variable so that the compiler does not attempt to optimize it away.
*/
#define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \
if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \
(sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \
} while (0)
#ifndef CILK_FAKE_NO_SHRINKWRAP
/* "shrink-wrap" optimization enabled. Do not initialize frame on entry,
* except to clear worker pointer. Instead, defer initialization until
* the first spawn.
*/
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0))
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \
if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \
} while (0)
#else
/* "shrink-wrap" optimization disabled. Initialize frame immediately on
* entry. Do not initialize frame on spawn.
*/
# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \
__cilk_fake_enter_frame(&(sf))
# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf))
#endif
/* Prologue of a spawning function. Declares and initializes the stack
* frame.
*/
#define CILK_FAKE_PROLOG() \
__cilk_fake_stack_frame __cilk_sf; \
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf)
/* Prologue of a spawning function where the current worker is already known.
* Declares and initializes the stack frame without looking up the worker from
* TLS.
*/
#define CILK_FAKE_PROLOG_FAST(w) \
__cilk_fake_stack_frame __cilk_sf; \
CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
__cilk_fake_enter_frame_fast(&__cilk_sf, (w))
/* Simulate a cilk_sync */
#define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf)
/* Epilog at the end of a spawning function. Does a sync and calls the
* runtime for leaving the frame.
*/
#ifdef __cplusplus
// Epilogue is run automatically by __cilk_fake_stack_frame destructor.
# define CILK_FAKE_EPILOG() ((void) __cilk_sf)
#else
# define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf)
#endif // C
/* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and
* __cilk_fake_stack_frame destructor body.
*/
#define CILK_FAKE_CLEANUP_FRAME(sf) do { \
if (! (sf).worker) break; \
CILK_FAKE_SYNC_IMP(sf); \
CILK_FAKE_POP_FRAME(sf); \
if ((sf).flags != CILK_FAKE_VERSION_FLAG) \
__cilkrts_leave_frame(&(sf)); \
} while (0)
/* Implementation of CILK_FAKE_SYNC with sf argument */
#define CILK_FAKE_SYNC_IMP(sf) do { \
if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \
(sf).parent_pedigree = (sf).worker->pedigree; \
CILK_FAKE_SAVE_FP(sf); \
if (! CILK_SETJMP((sf).ctx)) \
__cilkrts_sync(&(sf)); \
} \
++(sf).worker->pedigree.rank; \
} while (0)
/* Save the floating-point control registers.
* The definition of CILK_FAKE_SAVE_FP is compiler specific (and
* architecture specific on Windows)
*/
#ifdef _MSC_VER
# define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr)
# define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr)
# if defined(_M_IX86)
/* Windows x86 */
# define CILK_FAKE_SAVE_FP(sf) do { \
__asm \
{ \
mov eax, sf \
stmxcsr [eax+MXCSR_OFFSET] \
fnstcw [eax+FPCSR_OFFSET] \
} \
} while (0)
# elif defined(_M_X64)
/* Windows Intel64 - Not needed - saved by setjmp call */
# define CILK_FAKE_SAVE_FP(sf) ((void) sf)
# else
# error "Unknown architecture"
# endif /* Microsoft architecture specifics */
#else
/* Non-Windows */
# define CILK_FAKE_SAVE_FP(sf) do { \
__asm__ ( "stmxcsr %0\n\t" \
"fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \
} while (0)
#endif
/* Call the spawn helper as part of a fake spawn */
#define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \
CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \
CILK_FAKE_SAVE_FP(__cilk_sf); \
if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \
helper; \
} \
} while (0)
/* Body of a spawn helper function. In addition to the worker and the
* expression to spawn, pass it any number of statements to be executed before
* detaching.
*/
#define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \
CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \
__VA_ARGS__; \
__cilk_fake_detach(&__cilk_sf); \
expr; \
CILK_FAKE_SPAWN_HELPER_EPILOG()
/* Prolog for a spawn helper function */
#define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \
__cilk_fake_spawn_helper_stack_frame __cilk_sf; \
__cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf))
/* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG
* and the __cilk_fake_spawn_helper_frame destructor.
*/
#define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \
if (! (sf).worker) break; \
CILK_FAKE_POP_FRAME(sf); \
__cilkrts_leave_frame(&(sf)); \
} while (0)
/* Epilog to execute at the end of a spawn helper */
#ifdef __cplusplus
// Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor
# define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf)
#else
# define CILK_FAKE_SPAWN_HELPER_EPILOG() \
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf)
#endif
/* Pop the current frame off of the call chain */
#define CILK_FAKE_POP_FRAME(sf) do { \
(sf).worker->current_stack_frame = (sf).call_parent; \
(sf).call_parent = 0; \
} while (0)
#ifdef _WIN32
/* define macros for synching functions before allowing them to propagate. */
# define CILK_FAKE_EXCEPT_BEGIN \
if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) {
# define CILK_FAKE_EXCEPT_END \
} else { \
assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\
== CILK_FRAME_EXCEPTING); \
__cilkrts_rethrow(&__cilk_sf); \
exit(0); \
}
#else
# define CILK_EXCEPT_BEGIN {
# define CILK_EXCEPT_END }
#endif
#ifdef __cplusplus
// The following definitions depend on C++ features.
// Wrap a functor (probably a lambda), so that a call to it cannot be
// inlined.
template <typename F>
class __cilk_fake_noinline_wrapper
{
F&& m_fn;
public:
__cilk_fake_noinline_wrapper(F&& fn) : m_fn(static_cast<F&&>(fn)) { }
#ifdef _WIN32
__declspec(noinline) void operator()(__cilkrts_stack_frame *sf);
#else
void operator()(__cilkrts_stack_frame *sf) __attribute__((noinline));
#endif
};
template <typename F>
void __cilk_fake_noinline_wrapper<F>::operator()(__cilkrts_stack_frame *sf)
{
m_fn(sf);
}
template <typename F>
inline
__cilk_fake_noinline_wrapper<F> __cilk_fake_make_noinline_wrapper(F&& fn)
{
return __cilk_fake_noinline_wrapper<F>(static_cast<F&&>(fn));
}
// Simulate "_Cilk_spawn expr", where expr must be a function call.
//
// Note: this macro does not correctly construct function arguments.
// According to the ABI specification, function arguments should be evaluated
// before the detach and destroyed after the detach. This macro both
// evaluates and destroys them after the detach. This means that if any part
// of the function argument expression depends on a value that is modified in
// the continuation of the spawn, race will occur between the continuation and
// the argument evaluation.
//
// To work around this problem, this macro accepts an arbitrary list of
// declarations and statements (separated by semicolons) that are evaluated
// before the detach. Thus, to simulate:
//
// _Cilk_spawn f(expr);
//
// one would write:
//
// CILK_FAKE_SPAWN(f(arg), auto arg = expr);
//
// Despite appearing in the reverse order, the 'arg' variable is created and
// initialized before the detach and the call to f(arg) occurs after the
// detach.
#define CILK_FAKE_SPAWN(expr, ...) \
CILK_FAKE_CALL_SPAWN_HELPER( \
CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf))
// Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints.
#define CILK_FAKE_SPAWN_R(ret, expr, ...) \
CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__)
// Create a spawn helper as a C++11 lambda function. In addition to the
// expression to spawn, this macro takes a any number of statements to be
// executed before detaching.
#define CILK_FAKE_SPAWN_HELPER(expr, ...) \
__cilk_fake_make_noinline_wrapper([&](__cilkrts_stack_frame *parent_sf) { \
CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \
})
// C++ version of a __cilkrts_stack_frame for a spawning function.
// This struct is identical to __cilkrts_stack_frame except that the
// destructor automatically does frame cleanup.
struct __cilk_fake_stack_frame : __cilkrts_stack_frame
{
// Extension of __cilkrts_stack_frame with constructor and destructor
__cilk_fake_stack_frame() { }
__forceinline ~__cilk_fake_stack_frame() {
CILK_FAKE_CLEANUP_FRAME(*this);
}
};
// C++ version of a __cilkrts_stack_frame for a spawn helper.
// This struct is identical to __cilkrts_stack_frame except that the
// destructor automatically does frame cleanup.
struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame
{
// Extension of __cilkrts_stack_frame with constructor and destructor
__cilk_fake_spawn_helper_stack_frame() { worker = 0; }
__forceinline ~__cilk_fake_spawn_helper_stack_frame() {
CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this);
}
};
#else
// For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are
// identical to __cilkrts_stack_frame. Frame cleanup must be performed
// excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG)
typedef __cilkrts_stack_frame __cilk_fake_stack_frame;
typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame;
#endif
#endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)