diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..e43b0f9
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+.DS_Store
diff --git a/minicoro.h b/minicoro.h
index 6ad183a..4ec9604 100644
--- a/minicoro.h
+++ b/minicoro.h
@@ -631,6 +631,9 @@ typedef struct _mco_ctxbuf {
static void _mco_wrap_main(void) {
__asm__ __volatile__ (
"movq %r13, %rdi\n\t"
+#if defined(__APPLE__)
+ "sub $8, %rsp\n\t"
+#endif
"jmpq *%r12");
}
@@ -658,7 +661,6 @@ static MCO_FORCE_INLINE void _mco_switch(_mco_ctxbuf* from, _mco_ctxbuf* to) {
: "rax", "rcx", "rdx", "r8", "r9", "r10", "r11", "memory", "cc");
}
-
static mco_result _mco_makectx(mco_coro* co, _mco_ctxbuf* ctx, void* stack_base, size_t stack_size) {
stack_size = stack_size - 128; /* Reserve 128 bytes for the Red Zone space (System V AMD64 ABI). */
void** stack_high_ptr = (void**)((size_t)stack_base + stack_size - sizeof(size_t));
diff --git a/tests/mt-example.c b/tests/mt-example.c
index 2dae01d..2a6bf49 100644
--- a/tests/mt-example.c
+++ b/tests/mt-example.c
@@ -1,7 +1,9 @@
#define MINICORO_IMPL
#include "minicoro.h"
-#define C89THREAD_IMPLEMENTATION
-#include "thirdparty/c89thread.h"
+
+#define CUTE_SYNC_IMPLEMENTATION
+#define CUTE_SYNC_POSIX
+#include "thirdparty/cute_sync.h"
#include
#define NUM_THREADS 4
@@ -9,8 +11,8 @@
#define NUM_ITERATIONS 500
#define EXPECTED_RESULT 2396583362
-static c89mtx_t mutex;
-static c89thrd_t threads[NUM_THREADS];
+static cute_mutex_t mutex;
+static cute_thread_t* threads[NUM_THREADS];
static mco_coro* tasks[NUM_TASKS];
static void fail_mco(const char* message, mco_result res) {
@@ -61,7 +63,7 @@ int thread_worker(void* data) {
mco_coro* task = NULL;
int task_id = 0;
- if(c89mtx_lock(&mutex) != c89thrd_success)
+ if(!cute_lock(&mutex))
fail("Unable to lock mutex");
for(int i=0;i
+#include
// Coroutine entry function.
void coro_entry(mco_coro* co) {
diff --git a/tests/thirdparty/cute_sync.h b/tests/thirdparty/cute_sync.h
new file mode 100644
index 0000000..46a6682
--- /dev/null
+++ b/tests/thirdparty/cute_sync.h
@@ -0,0 +1,1331 @@
+/*
+ ------------------------------------------------------------------------------
+ Licensing information can be found at the end of the file.
+ ------------------------------------------------------------------------------
+
+ cute_sync.h - v1.01
+
+ To create implementation (the function definitions)
+ #define CUTE_SYNC_IMPLEMENTATION
+ #define CUTE_SYNC_WINDOWS
+ in *one* C/CPP file (translation unit) that includes this file
+
+ SUMMARY
+
+ Collection of practical syncronization primitives for Windows/Posix/SDL2.
+
+ Here is a list of all supported primitives.
+
+ * atomic integer/pointer
+ * thread
+ * mutex
+ * condition variable
+ * semaphore
+ * read/write lock
+ * thread pool
+
+ Here are some slides I wrote for those interested in learning prequisite
+ knowledge for utilizing this header:
+ http://www.randygaul.net/2014/09/24/multi-threading-best-practices-for-gamedev/
+
+ A good chunk of this code came from Mattias Gustavsson's thread.h header.
+ It really is quite a good header, and worth considering!
+ https://github.com/mattiasgustavsson/libs
+
+
+ PLATFORMS
+
+ The current supported platforms are Windows/Posix/SDL. Here are the macros for
+ picking each implementation.
+
+ * CUTE_SYNC_WINDOWS
+ * CUTE_SYNC_POSIX
+ * CUTE_SYNC_SDL
+
+
+ REVISION HISTORY
+
+ 1.0 (05/31/2018) initial release
+ 1.01 (08/25/2019) Windows and pthreads port
+*/
+
+#if !defined(CUTE_SYNC_H)
+
+typedef union cute_atomic_int_t cute_atomic_int_t;
+typedef union cute_mutex_t cute_mutex_t;
+typedef union cute_cv_t cute_cv_t;
+typedef struct cute_semaphore_t cute_semaphore_t;
+typedef struct cute_thread_t cute_thread_t;
+typedef unsigned long long cute_thread_id_t;
+typedef int (cute_thread_fn)(void *udata);
+
+/**
+ * Creates an unlocked mutex.
+ */
+cute_mutex_t cute_mutex_create();
+
+/**
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_lock(cute_mutex_t* mutex);
+
+/**
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_unlock(cute_mutex_t* mutex);
+
+/**
+ * Attempts to lock the mutex without blocking. Returns one if lock was acquired,
+ * otherwise returns zero.
+ */
+int cute_trylock(cute_mutex_t* mutex);
+void cute_mutex_destroy(cute_mutex_t* mutex);
+
+/**
+ * Constructs a condition variable, used to sleep or wake threads.
+ */
+cute_cv_t cute_cv_create();
+
+/**
+ * Signals all sleeping threads to wake that are waiting on the condition variable.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_cv_wake_all(cute_cv_t* cv);
+
+/**
+ * Signals a single thread to wake that are waiting on the condition variable.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_cv_wake_one(cute_cv_t* cv);
+
+/**
+ * Places a thread to wait on the condition variable.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_cv_wait(cute_cv_t* cv, cute_mutex_t* mutex);
+void cute_cv_destroy(cute_cv_t* cv);
+
+/**
+ * Creates a semaphore with an initial internal value of `initial_count`.
+ * Returns NULL on failure.
+ */
+cute_semaphore_t cute_semaphore_create(int initial_count);
+
+/**
+ * Automically increments the semaphore's value and then wakes a sleeping thread.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_semaphore_post(cute_semaphore_t* semaphore);
+
+/**
+ * Non-blocking version of `cute_semaphore_wait`.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_semaphore_try(cute_semaphore_t* semaphore);
+
+/**
+ * Suspends the calling thread's execution unless the semaphore's value is positive. Will
+ * decrement the value atomically afterwards.
+ * Returns 1 on success, zero otherwise.
+ */
+int cute_semaphore_wait(cute_semaphore_t* semaphore);
+int cute_semaphore_value(cute_semaphore_t* semaphore);
+void cute_semaphore_destroy(cute_semaphore_t* semaphore);
+
+cute_thread_t* cute_thread_create(cute_thread_fn func, const char* name, void* udata);
+
+/**
+ * An optimization, meaning the thread will never have `cute_thread_wait` called on it.
+ * Useful for certain long-lived threads.
+ * It is invalid to call `cute_thread_wait` on a detached thread.
+ * It is invalid to call `cute_thread_wait` on a thread more than once.
+ * Please see this link for a longer description: https://wiki.libsdl.org/SDL_DetachThread
+ */
+void cute_thread_detach(cute_thread_t* thread);
+cute_thread_id_t cute_thread_get_id(cute_thread_t* thread);
+cute_thread_id_t cute_thread_id();
+
+/**
+ * Waits until the thread exits (unless it has already exited), and returns the thread's
+ * return code. Unless the thread was detached, this function must be used, otherwise it
+ * is considered a leak to leave an thread hanging around (even if it finished execution
+ * and returned).
+ */
+int cute_thread_wait(cute_thread_t* thread);
+
+/**
+ * Returns the number of CPU cores on the machine. Can be affected my machine dependent technology,
+ * such as Intel's hyperthreading.
+ */
+int cute_core_count();
+
+/**
+ * Returns the size of CPU's L1's cache line size in bytes.
+ */
+int cute_cacheline_size();
+
+/**
+ * Returns the size of the machine's RAM in megabytes.
+ */
+int cute_ram_size();
+
+/**
+ * Atomically adds `addend` at `atomic` and returns the old value at `atomic`.
+ */
+int cute_atomic_add(cute_atomic_int_t* atomic, int addend);
+
+/**
+ * Atomically sets `value` at `atomic` and returns the old value at `atomic`.
+ */
+int cute_atomic_set(cute_atomic_int_t* atomic, int value);
+
+/**
+ * Atomically fetches the value at `atomic`.
+ */
+int cute_atomic_get(cute_atomic_int_t* atomic);
+
+/**
+ * Atomically sets `atomic` to `value` if `expected` equals `atomic`.
+ * Returns 1 of the value was set, 0 otherwise.
+ */
+int cute_atomic_cas(cute_atomic_int_t* atomic, int expected, int value);
+
+/**
+ * Atomically sets `value` at `atomic` and returns the old value at `atomic`.
+ */
+void* cute_atomic_ptr_set(void** atomic, void* value);
+
+/**
+ * Atomically fetches the value at `atomic`.
+ */
+void* cute_atomic_ptr_get(void** atomic);
+
+/**
+ * Atomically sets `atomic` to `value` if `expected` equals `atomic`.
+ * Returns 1 of the value was set, 0 otherwise.
+ */
+int cute_atomic_ptr_cas(void** atomic, void* expected, void* value);
+
+/**
+ * A reader/writer mutual exclusion lock. Allows many simultaneous readers or a single writer.
+ *
+ * The number of readers is capped by `CUTE_RW_LOCK_MAX_READERS` (or in other words, a nearly indefinite
+ * number). Exceeding `CUTE_RW_LOCK_MAX_READERS` simultaneous readers results in undefined behavior.
+ */
+typedef struct cute_rw_lock_t cute_rw_lock_t;
+#define CUTE_RW_LOCK_MAX_READERS (1 << 30)
+
+/**
+ * Constructs an unlocked mutual exclusion read/write lock. The `rw` lock can safely sit
+ * on the stack.
+ */
+cute_rw_lock_t cute_rw_lock_create();
+
+/**
+ * Locks for reading. Many simultaneous readers are allowed.
+ */
+void cute_read_lock(cute_rw_lock_t* rw);
+
+/**
+ * Undoes a single call to `cute_read_lock`.
+ */
+void cute_read_unlock(cute_rw_lock_t* rw);
+
+/**
+ * Locks for writing. When locked for writing, only one writer can be present, and no readers.
+ *
+ * Will wait for active readers to call `cute_read_unlock`, or for active writers to call
+ * `cute_write_unlock`.
+ */
+void cute_write_lock(cute_rw_lock_t* rw);
+
+/**
+ * Undoes a single call to `cute_write_lock`.
+ */
+void cute_write_unlock(cute_rw_lock_t* rw);
+
+/**
+ * Destroys the internal semaphores, and mutex.
+ */
+void cute_rw_lock_destroy(cute_rw_lock_t* rw);
+
+typedef struct cute_threadpool_t cute_threadpool_t;
+
+/**
+ * Constructs a threadpool containing `thread_count`, useful for implementing job/task systems.
+ * `mem_ctx` can be NULL, and is used for custom allocation purposes.
+ *
+ * Returns NULL on error. Will return NULL if `CUTE_SYNC_CACHELINE_SIZE` is less than the actual
+ * cache line size on a given machine. `CUTE_SYNC_CACHELINE_SIZE` defaults to 128 bytes, and can
+ * be overidden by defining CUTE_SYNC_CACHELINE_SIZE before including cute_sync.h
+ *
+ * Makes a modest attempt at memory aligning to avoid false sharing, as an optimization.
+ */
+cute_threadpool_t* cute_threadpool_create(int thread_count, void* mem_ctx);
+
+/**
+ * Atomically adds a single task to the internal task stack (FIFO order). The task is represented
+ * as a function pointer `func`, which does work. The `param` is passed to the `func` when the
+ * task is started.
+ */
+void cute_threadpool_add_task(cute_threadpool_t* pool, void (*func)(void*), void* param);
+
+/**
+ * Wakes internal threads to perform tasks, and waits for all tasks to complete before returning.
+ * The calling thread will help perform available tasks while waiting.
+ */
+void cute_threadpool_kick_and_wait(cute_threadpool_t* pool);
+
+/**
+ * Wakes internal threads to perform tasks and immediately returns.
+ */
+void cute_threadpool_kick(cute_threadpool_t* pool);
+
+/**
+ * Cleans up all resources created from `cute_threadpool_create`.
+ */
+void cute_threadpool_destroy(cute_threadpool_t* pool);
+
+#define CUTE_SYNC_H
+#endif
+
+//--------------------------------------------------------------------------------------------------
+
+#ifndef CUTE_SYNC_TYPE_DEFINITIONS_H
+
+union cute_atomic_int_t { void* align; long i; };
+union cute_mutex_t { void* align; char data[64]; };
+union cute_cv_t { void* align; char data[64]; };
+struct cute_semaphore_t { void* id; cute_atomic_int_t count; };
+
+struct cute_rw_lock_t
+{
+ cute_mutex_t mutex;
+ cute_semaphore_t write_sem;
+ cute_semaphore_t read_sem;
+ cute_atomic_int_t readers;
+ cute_atomic_int_t readers_departing;
+};
+
+#define CUTE_SYNC_TYPE_DEFINITIONS_H
+#endif
+
+//--------------------------------------------------------------------------------------------------
+
+#if defined(CUTE_SYNC_IMPLEMENTATION)
+#if !defined(CUTE_SYNC_IMPLEMENTATION_ONCE)
+#define CUTE_SYNC_IMPLEMENTATION_ONCE
+
+#if defined(CUTE_SYNC_SDL)
+#elif defined(CUTE_SYNC_WINDOWS)
+ #define WIN32_LEAN_AND_MEAN
+ #ifndef WINVER
+ #define WINVER 0x0600
+ #endif
+ #include
+#elif defined(CUTE_SYNC_POSIX)
+ #include
+ #include
+
+ // Just platforms with unistd.h are supported for now.
+ // So no FreeBSD, OS/2, or other weird platforms.
+ #include // sysconf
+
+ #if defined(__APPLE__)
+ #include // sysctlbyname
+ #endif
+#else
+ #error Please choose a base implementation between CUTE_SYNC_SDL, CUTE_SYNC_WINDOWS and CUTE_SYNC_POSIX.
+#endif
+
+#if !defined(CUTE_SYNC_ALLOC)
+ #include
+ #define CUTE_SYNC_ALLOC(size, ctx) malloc(size)
+ #define CUTE_SYNC_FREE(ptr, ctx) free(ptr)
+#endif
+
+#if !defined(CUTE_SYNC_MEMCPY)
+ #include
+ #define CUTE_SYNC_MEMCPY memcpy
+#endif
+
+#if !defined(CUTE_SYNC_YIELD)
+ #ifdef CUTE_SYNC_WINDOWS
+ #define WIN32_LEAN_AND_MEAN
+ #include // winnt
+ #define CUTE_SYNC_YIELD YieldProcessor
+ #elif defined(CUTE_SYNC_POSIX)
+ #include
+ #define CUTE_SYNC_YIELD sched_yield
+ #else
+ #define CUTE_SYNC_YIELD() // Not implemented by SDL.
+ #endif
+#endif
+
+#if !defined(CUTE_SYNC_ASSERT)
+ #include
+ #define CUTE_SYNC_ASSERT assert
+#endif
+
+#if !defined(CUTE_SYNC_CACHELINE_SIZE)
+ // Sized generously to try and avoid guessing "too low". Too small would incur serious overhead
+ // inside of `cute_threadpool_t` as false sharing would run amok between pooled threads.
+ #define CUTE_SYNC_CACHELINE_SIZE 128
+#endif
+
+// Atomics implementation.
+// Use SDL2's implementation if available, otherwise WIN32 and GCC-like compilers are supported out-of-the-box.
+#ifdef CUTE_SYNC_SDL
+
+int cute_atomic_add(cute_atomic_int_t* atomic, int addend)
+{
+ return SDL_AtomicAdd((SDL_atomic_t*)atomic, addend);
+}
+
+int cute_atomic_set(cute_atomic_int_t* atomic, int value)
+{
+ return SDL_AtomicSet((SDL_atomic_t*)atomic, value);
+}
+
+int cute_atomic_get(cute_atomic_int_t* atomic)
+{
+ return SDL_AtomicGet((SDL_atomic_t*)atomic);
+}
+
+int cute_atomic_cas(cute_atomic_int_t* atomic, int expected, int value)
+{
+ return SDL_AtomicCAS((SDL_atomic_t*)atomic, expected, value);
+}
+
+void* cute_atomic_ptr_set(void** atomic, void* value)
+{
+ return SDL_AtomicSetPtr(atomic, value);
+}
+
+void* cute_atomic_ptr_get(void** atomic)
+{
+ return SDL_AtomicGetPtr(atomic);
+}
+
+int cute_atomic_ptr_cas(void** atomic, void* expected, void* value)
+{
+ return SDL_AtomicCASPtr(atomic, expected, value);
+}
+
+#elif defined(CUTE_SYNC_WINDOWS)
+
+int cute_atomic_add(cute_atomic_int_t* atomic, int addend)
+{
+ return (int)_InterlockedExchangeAdd(&atomic->i, (LONG)addend);
+}
+
+int cute_atomic_set(cute_atomic_int_t* atomic, int value)
+{
+ return (int)_InterlockedExchange(&atomic->i, value);
+}
+
+int cute_atomic_get(cute_atomic_int_t* atomic)
+{
+ return (int)_InterlockedCompareExchange(&atomic->i, 0, 0);
+}
+
+int cute_atomic_cas(cute_atomic_int_t* atomic, int expected, int value)
+{
+ return (int)_InterlockedCompareExchange(&atomic->i, expected, value) == value;
+}
+
+void* cute_atomic_ptr_set(void** atomic, void* value)
+{
+ return _InterlockedExchangePointer(atomic, value);
+}
+
+void* cute_atomic_ptr_get(void** atomic)
+{
+ return _InterlockedCompareExchangePointer(atomic, NULL, NULL);
+}
+
+int cute_atomic_ptr_cas(void** atomic, void* expected, void* value)
+{
+ return _InterlockedCompareExchangePointer(atomic, expected, value) == value;
+}
+
+#elif defined(CUTE_SYNC_POSIX)
+
+#if !(defined(__linux__) || defined(__APPLE__) || defined(__ANDROID__))
+# error Unsupported platform found - no atomics implementation available for this compiler.
+# error The section only implements GCC atomics.
+#endif
+
+int cute_atomic_add(cute_atomic_int_t* atomic, int addend)
+{
+ return (int)__sync_fetch_and_add(&atomic->i, addend);
+}
+
+int cute_atomic_set(cute_atomic_int_t* atomic, int value)
+{
+ int result = (int)__sync_lock_test_and_set(&atomic->i, value);
+ __sync_lock_release(&atomic->i);
+ return result;
+}
+
+int cute_atomic_get(cute_atomic_int_t* atomic)
+{
+ return (int)__sync_fetch_and_add(&atomic->i, 0);
+}
+
+int cute_atomic_cas(cute_atomic_int_t* atomic, int expected, int value)
+{
+ return (int)__sync_val_compare_and_swap(&atomic->i, expected, value) == value;
+}
+
+void* cute_atomic_ptr_set(void** atomic, void* value)
+{
+ void* result = __sync_lock_test_and_set(atomic, value);
+ __sync_lock_release(atomic);
+ return result;
+}
+
+void* cute_atomic_ptr_get(void** atomic)
+{
+ return __sync_fetch_and_add(atomic, NULL);
+}
+
+int cute_atomic_ptr_cas(void** atomic, void* expected, void* value)
+{
+ return __sync_val_compare_and_swap(atomic, expected, value) == value;
+}
+
+#endif // End atomics implementation.
+
+#if defined(CUTE_SYNC_SDL)
+
+cute_mutex_t cute_mutex_create()
+{
+ cute_mutex_t mutex;
+ mutex.align = SDL_CreateMutex();
+ return mutex;
+}
+
+int cute_lock(cute_mutex_t* mutex)
+{
+ return !SDL_LockMutex((SDL_mutex*)mutex->align);
+}
+
+int cute_unlock(cute_mutex_t* mutex)
+{
+ return !SDL_UnlockMutex((SDL_mutex*)mutex->align);
+}
+
+int cute_trylock(cute_mutex_t* mutex)
+{
+ return !SDL_TryLockMutex((SDL_mutex*)mutex->align);
+}
+
+void cute_mutex_destroy(cute_mutex_t* mutex)
+{
+ SDL_DestroyMutex((SDL_mutex*)mutex->align);
+}
+
+cute_cv_t cute_cv_create()
+{
+ cute_cv_t cv;
+ cv.align = SDL_CreateCond();
+ return cv;
+}
+
+int cute_cv_wake_all(cute_cv_t* cv)
+{
+ return !SDL_CondBroadcast((SDL_cond*)cv->align);
+}
+
+int cute_cv_wake_one(cute_cv_t* cv)
+{
+ return !SDL_CondSignal((SDL_cond*)cv->align);
+}
+
+int cute_cv_wait(cute_cv_t* cv, cute_mutex_t* mutex)
+{
+ return !SDL_CondWait((SDL_cond*)cv, (SDL_mutex*)mutex->align);
+}
+
+void cute_cv_destroy(cute_cv_t* cv)
+{
+ SDL_DestroyCond((SDL_cond*)cv->align);
+}
+
+cute_semaphore_t cute_semaphore_create(int initial_count)
+{
+ cute_semaphore_t semaphore;
+ semaphore.id = SDL_CreateSemaphore(initial_count);
+ semaphore.count.i = initial_count;
+ return semaphore;
+}
+
+int cute_semaphore_post(cute_semaphore_t* semaphore)
+{
+ return !SDL_SemPost((SDL_sem*)semaphore->id);
+}
+
+int cute_semaphore_try(cute_semaphore_t* semaphore)
+{
+ return !SDL_SemTryWait((SDL_sem*)semaphore->id);
+}
+
+int cute_semaphore_wait(cute_semaphore_t* semaphore)
+{
+ return !SDL_SemWait((SDL_sem*)semaphore->id);
+}
+
+int cute_semaphore_value(cute_semaphore_t* semaphore)
+{
+ return SDL_SemValue((SDL_sem*)semaphore->id);
+}
+
+void cute_semaphore_destroy(cute_semaphore_t* semaphore)
+{
+ SDL_DestroySemaphore((SDL_sem*)semaphore->id);
+}
+
+cute_thread_t* cute_thread_create(cute_thread_fn func, const char* name, void* udata)
+{
+ return (cute_thread_t*)SDL_CreateThread(func, name, udata);
+}
+
+void cute_thread_detach(cute_thread_t* thread)
+{
+ SDL_DetachThread((SDL_Thread*)thread);
+}
+
+cute_thread_id_t cute_thread_get_id(cute_thread_t* thread)
+{
+ return SDL_GetThreadID((SDL_Thread*)thread);
+}
+
+cute_thread_id_t cute_thread_id()
+{
+ return SDL_ThreadID();
+}
+
+int cute_thread_wait(cute_thread_t* thread)
+{
+ int ret;
+ SDL_WaitThread((SDL_Thread*)thread, &ret);
+ return ret;
+}
+
+int cute_core_count()
+{
+ return SDL_GetCPUCount();
+}
+
+int cute_cacheline_size()
+{
+ return SDL_GetCPUCacheLineSize();
+}
+
+int cute_ram_size()
+{
+ return SDL_GetSystemRAM();
+}
+
+#elif defined(CUTE_SYNC_WINDOWS)
+
+cute_mutex_t cute_mutex_create()
+{
+ CUTE_SYNC_ASSERT(sizeof(CRITICAL_SECTION) <= sizeof(cute_mutex_t));
+ cute_mutex_t mutex;
+ InitializeCriticalSectionAndSpinCount((CRITICAL_SECTION*)&mutex, 2000);
+ return mutex;
+}
+
+int cute_lock(cute_mutex_t* mutex)
+{
+ EnterCriticalSection((CRITICAL_SECTION*)mutex);
+ return 1;
+}
+
+int cute_unlock(cute_mutex_t* mutex)
+{
+ LeaveCriticalSection((CRITICAL_SECTION*)mutex);
+ return 1;
+}
+
+int cute_trylock(cute_mutex_t* mutex)
+{
+ return !TryEnterCriticalSection((CRITICAL_SECTION*)mutex);
+}
+
+void cute_mutex_destroy(cute_mutex_t* mutex)
+{
+ DeleteCriticalSection((CRITICAL_SECTION*)mutex);
+}
+
+cute_cv_t cute_cv_create()
+{
+ CUTE_SYNC_ASSERT(sizeof(CONDITION_VARIABLE) <= sizeof(cute_cv_t));
+ cute_cv_t cv;
+ InitializeConditionVariable((CONDITION_VARIABLE*)&cv);
+ return cv;
+}
+
+int cute_cv_wake_all(cute_cv_t* cv)
+{
+ WakeAllConditionVariable((CONDITION_VARIABLE*)cv);
+ return 1;
+}
+
+int cute_cv_wake_one(cute_cv_t* cv)
+{
+ WakeConditionVariable((CONDITION_VARIABLE*)cv);
+ return 1;
+}
+
+int cute_cv_wait(cute_cv_t* cv, cute_mutex_t* mutex)
+{
+ return !!SleepConditionVariableCS((CONDITION_VARIABLE*)cv, (CRITICAL_SECTION*)mutex, INFINITE);
+}
+
+void cute_cv_destroy(cute_cv_t* cv)
+{
+ // Nothing needed here on Windows... Weird!
+ // https://stackoverflow.com/questions/28975958/why-does-windows-have-no-deleteconditionvariable-function-to-go-together-with
+}
+
+cute_semaphore_t cute_semaphore_create(int initial_count)
+{
+ cute_semaphore_t semaphore;
+ semaphore.id = CreateSemaphoreA(NULL, (LONG)initial_count, LONG_MAX, NULL);
+ semaphore.count.i = initial_count;
+ return semaphore;
+}
+
+int cute_semaphore_post(cute_semaphore_t* semaphore)
+{
+ _InterlockedIncrement(&semaphore->count.i);
+ if (ReleaseSemaphore(semaphore->id, 1, NULL) == FALSE) {
+ _InterlockedDecrement(&semaphore->count.i);
+ return 0;
+ } else {
+ return 1;
+ }
+}
+
+static int s_wait(cute_semaphore_t* semaphore, DWORD milliseconds)
+{
+ if (WaitForSingleObjectEx(semaphore->id, milliseconds, FALSE) == WAIT_OBJECT_0) {
+ return 1;
+ } else {
+ return 0;
+ }
+}
+
+int cute_semaphore_try(cute_semaphore_t* semaphore)
+{
+ return s_wait(semaphore, 0);
+}
+
+int cute_semaphore_wait(cute_semaphore_t* semaphore)
+{
+ return s_wait(semaphore, INFINITE);
+}
+
+int cute_semaphore_value(cute_semaphore_t* semaphore)
+{
+ return cute_atomic_get(&semaphore->count);
+}
+
+void cute_semaphore_destroy(cute_semaphore_t* semaphore)
+{
+ CloseHandle((HANDLE)&semaphore->id);
+}
+
+cute_thread_t* cute_thread_create(cute_thread_fn fn, const char* name, void* udata)
+{
+ (void)name;
+ DWORD unused;
+ HANDLE id = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)fn, udata, 0, &unused);
+ return (cute_thread_t*)id;
+}
+
+void cute_thread_detach(cute_thread_t* thread)
+{
+ CloseHandle((HANDLE)thread);
+}
+
+cute_thread_id_t cute_thread_get_id(cute_thread_t* thread)
+{
+ return GetThreadId((HANDLE)thread);
+}
+
+cute_thread_id_t cute_thread_id()
+{
+ return GetCurrentThreadId();
+}
+
+int cute_thread_wait(cute_thread_t* thread)
+{
+ WaitForSingleObject((HANDLE)thread, INFINITE);
+ CloseHandle((HANDLE)thread);
+ return 1;
+}
+
+int cute_core_count()
+{
+ SYSTEM_INFO info;
+ GetSystemInfo(&info);
+ return (int)info.dwNumberOfProcessors;
+}
+
+int cute_cacheline_size()
+{
+ DWORD buffer_size;
+ SYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer[256];
+
+ GetLogicalProcessorInformation(0, &buffer_size);
+ DWORD buffer_count = buffer_size / sizeof(SYSTEM_LOGICAL_PROCESSOR_INFORMATION);
+ if (buffer_count > 256) {
+ // Just guess... Since this machine has more than 256 cores?
+ // Supporting more than 256 cores would probably require a malloc here.
+ return 128;
+ }
+
+ GetLogicalProcessorInformation(buffer, &buffer_size);
+
+ for (DWORD i = 0; i < buffer_count; ++i) {
+ if (buffer[i].Relationship == RelationCache && buffer[i].Cache.Level == 1) {
+ return (int)buffer[i].Cache.LineSize;
+ }
+ }
+
+ // Just guess...
+ return 128;
+}
+
+int cute_ram_size()
+{
+ MEMORYSTATUSEX status;
+ status.dwLength = sizeof(status);
+ GlobalMemoryStatusEx(&status);
+ return (int)(status.ullTotalPhys / (1024 * 1024));
+}
+
+#elif defined(CUTE_SYNC_POSIX)
+
+cute_mutex_t cute_mutex_create()
+{
+ CUTE_SYNC_ASSERT(sizeof(pthread_mutex_t) <= sizeof(cute_mutex_t));
+ cute_mutex_t mutex;
+ pthread_mutex_init((pthread_mutex_t*)&mutex, NULL);
+ return mutex;
+}
+
+int cute_lock(cute_mutex_t* mutex)
+{
+ pthread_mutex_lock((pthread_mutex_t*)mutex);
+ return 1;
+}
+
+int cute_unlock(cute_mutex_t* mutex)
+{
+ pthread_mutex_unlock((pthread_mutex_t*)mutex);
+ return 1;
+}
+
+int cute_trylock(cute_mutex_t* mutex)
+{
+ return !pthread_mutex_trylock((pthread_mutex_t*)mutex);
+}
+
+void cute_mutex_destroy(cute_mutex_t* mutex)
+{
+ pthread_mutex_destroy((pthread_mutex_t*)mutex);
+}
+
+cute_cv_t cute_cv_create()
+{
+ CUTE_SYNC_ASSERT(sizeof(pthread_cond_t) <= sizeof(cute_cv_t));
+ cute_cv_t cv;
+ pthread_cond_init((pthread_cond_t*)&cv, NULL);
+ return cv;
+}
+
+int cute_cv_wake_all(cute_cv_t* cv)
+{
+ pthread_cond_broadcast((pthread_cond_t*)cv);
+ return 1;
+}
+
+int cute_cv_wake_one(cute_cv_t* cv)
+{
+ pthread_cond_signal((pthread_cond_t*)cv);
+ return 1;
+}
+
+int cute_cv_wait(cute_cv_t* cv, cute_mutex_t* mutex)
+{
+ return !pthread_cond_wait((pthread_cond_t*)cv, (pthread_mutex_t*)mutex);
+}
+
+void cute_cv_destroy(cute_cv_t* cv)
+{
+ pthread_cond_destroy((pthread_cond_t*)cv);
+}
+
+#if !defined(__APPLE__)
+
+cute_semaphore_t cute_semaphore_create(int initial_count)
+{
+ cute_semaphore_t semaphore;
+ sem_init((sem_t*)&semaphore.id, 0, (unsigned)initial_count);
+ semaphore.count.i = initial_count;
+ return semaphore;
+}
+
+int cute_semaphore_post(cute_semaphore_t* semaphore)
+{
+ return !sem_post((sem_t*)semaphore->id);
+}
+
+int cute_semaphore_try(cute_semaphore_t* semaphore)
+{
+ return !sem_trywait((sem_t*)semaphore->id);
+}
+
+int cute_semaphore_wait(cute_semaphore_t* semaphore)
+{
+ return !sem_try((sem_t*)semaphore->id);
+}
+
+int cute_semaphore_value(cute_semaphore_t* semaphore)
+{
+ int result = 0;
+ sem_getvalue((sem_t*)semaphore->id, &result);
+ return result;
+}
+
+void cute_semaphore_destroy(cute_semaphore_t* semaphore)
+{
+ sem_destroy((sem_t*)semaphore->id);
+}
+
+#elif defined(__APPLE__)
+
+// Because Apple sucks and deprecated posix semaphores we must make our own...
+
+typedef struct cute_apple_sem_t
+{
+ int count;
+ int waiting_count;
+ cute_mutex_t lock;
+ cute_cv_t cv;
+} cute_apple_sem_t;
+
+cute_semaphore_t cute_semaphore_create(int initial_count)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)CUTE_SYNC_ALLOC(sizeof(cute_apple_sem_t), NULL);
+ apple_sem->count = initial_count;
+ apple_sem->waiting_count = 0;
+ apple_sem->lock = cute_mutex_create();
+ apple_sem->cv = cute_cv_create();
+ cute_semaphore_t semaphore;
+ semaphore.id = (void*)apple_sem;
+ semaphore.count.i = initial_count;
+ return semaphore;
+}
+
+int cute_semaphore_post(cute_semaphore_t* semaphore)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)semaphore->id;
+ cute_lock(&apple_sem->lock);
+ if (apple_sem->waiting_count > 0) {
+ cute_cv_wake_one(&apple_sem->cv);
+ }
+ apple_sem->count += 1;
+ cute_unlock(&apple_sem->lock);
+ return 1;
+}
+
+int cute_semaphore_try(cute_semaphore_t* semaphore)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)semaphore->id;
+ int result = 0;
+ cute_lock(&apple_sem->lock);
+ if (apple_sem->count > 0) {
+ apple_sem->count -= 1;
+ result = 1;
+ }
+ cute_unlock(&apple_sem->lock);
+ return result;
+}
+
+int cute_semaphore_wait(cute_semaphore_t* semaphore)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)semaphore->id;
+ int result = 1;
+ cute_lock(&apple_sem->lock);
+ while (apple_sem->count == 0 && result) {
+ result = cute_cv_wait(&apple_sem->cv, &apple_sem->lock);
+ }
+ apple_sem->waiting_count -= 1;
+ if (result) {
+ apple_sem->count -= 1;
+ }
+ cute_unlock(&apple_sem->lock);
+ return result;
+}
+
+int cute_semaphore_value(cute_semaphore_t* semaphore)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)semaphore->id;
+ int value;
+ cute_lock(&apple_sem->lock);
+ value = apple_sem->count;
+ cute_unlock(&apple_sem->lock);
+ return value;
+}
+
+void cute_semaphore_destroy(cute_semaphore_t* semaphore)
+{
+ cute_apple_sem_t* apple_sem = (cute_apple_sem_t*)semaphore->id;
+ while (apple_sem->waiting_count > 0) {
+ cute_cv_wake_all(&apple_sem->cv);
+ CUTE_SYNC_YIELD();
+ }
+ cute_cv_destroy(&apple_sem->cv);
+ cute_lock(&apple_sem->lock);
+ cute_unlock(&apple_sem->lock);
+ cute_mutex_destroy(&apple_sem->lock);
+ CUTE_SYNC_FREE(apple_sem, NULL);
+}
+
+#endif
+
+cute_thread_t* cute_thread_create(cute_thread_fn fn, const char* name, void* udata)
+{
+ pthread_t thread;
+ pthread_create(&thread, NULL, (void* (*)(void*))fn, udata);
+#if !defined(__APPLE__)
+ if (name) pthread_setname_np(thread, name);
+#else
+ (void)name;
+#endif
+ return (cute_thread_t*)thread;
+}
+
+void cute_thread_detach(cute_thread_t* thread)
+{
+ pthread_detach((pthread_t)thread);
+}
+
+cute_thread_id_t cute_thread_get_id(cute_thread_t* thread)
+{
+ return (cute_thread_id_t)thread;
+}
+
+cute_thread_id_t cute_thread_id()
+{
+ return (cute_thread_id_t)pthread_self();
+}
+
+int cute_thread_wait(cute_thread_t* thread)
+{
+ pthread_join((pthread_t)thread, NULL);
+ return 1;
+}
+
+int cute_core_count()
+{
+ return (int)sysconf(_SC_NPROCESSORS_ONLN);
+}
+
+int cute_cacheline_size()
+{
+#if defined(__APPLE__)
+ size_t sz;
+ size_t szsz = sizeof(sz);
+ sysctlbyname("hw.cachelinesize", &sz, &szsz, 0, 0);
+ return (int)sz;
+#else
+ return (int)sysconf(_SC_LEVEL1_DCACHE_LINESIZE);
+#endif
+}
+
+int cute_ram_size()
+{
+ return (int)(sysconf(_SC_PHYS_PAGES) * sysconf(_SC_PAGESIZE) / (1024*1024));
+}
+
+#else
+
+ #error Please choose a base implementation between CUTE_SYNC_SDL, CUTE_SYNC_WINDOWS and CUTE_SYNC_POSIX.
+
+#endif
+
+cute_rw_lock_t cute_rw_lock_create()
+{
+ cute_rw_lock_t rw;
+ rw.mutex = cute_mutex_create();
+ rw.write_sem = cute_semaphore_create(0);
+ rw.read_sem = cute_semaphore_create(0);
+ rw.readers.i = 0;
+ rw.readers_departing.i = 0;
+ return rw;
+}
+
+void cute_read_lock(cute_rw_lock_t* rw)
+{
+ // Wait on writers.
+ // Negative means locked for writing, or there is a pending writer.
+ if (cute_atomic_add(&rw->readers, 1) < 0) {
+ cute_semaphore_wait(&rw->read_sem);
+ }
+}
+
+void cute_read_unlock(cute_rw_lock_t* rw)
+{
+ // Write is pending.
+ if (cute_atomic_add(&rw->readers, -1) < 0) {
+ // The final departing reader notifies the pending writer.
+ if (cute_atomic_add(&rw->readers_departing, -1) - 1 == 0) {
+ cute_semaphore_post(&rw->write_sem);
+ }
+ }
+}
+
+void cute_write_lock(cute_rw_lock_t* rw)
+{
+ cute_lock(&rw->mutex);
+
+ // Flip to negative to force new readers to wait. Record the number of active
+ // readers at that moment, which need to depart to allow write access.
+ int readers = cute_atomic_add(&rw->readers, -CUTE_RW_LOCK_MAX_READERS);
+
+ // Wait for departing readers.
+ if (cute_atomic_add(&rw->readers_departing, readers) + readers != 0) {
+ cute_semaphore_wait(&rw->write_sem);
+ }
+}
+
+void cute_write_unlock(cute_rw_lock_t* rw)
+{
+ // Flip to positive to allow new readers. Record the number of waiting readers
+ // at that moment.
+ int readers = cute_atomic_add(&rw->readers, CUTE_RW_LOCK_MAX_READERS) + CUTE_RW_LOCK_MAX_READERS;
+
+ // Signal all waiting readers to wake.
+ for (int i = 0; i < readers; ++i) {
+ cute_semaphore_post(&rw->read_sem);
+ }
+
+ cute_unlock(&rw->mutex);
+}
+
+void cute_rw_lock_destroy(cute_rw_lock_t* rw)
+{
+ cute_mutex_destroy(&rw->mutex);
+ cute_semaphore_destroy(&rw->write_sem);
+ cute_semaphore_destroy(&rw->read_sem);
+}
+
+#define CUTE_SYNC_ALIGN_PTR(X, Y) ((((size_t)X) + ((Y) - 1)) & ~((Y) - 1))
+
+static void* cute_malloc_aligned(size_t size, int alignment, void* mem_ctx)
+{
+ (void)mem_ctx;
+ int is_power_of_2 = alignment && !(alignment & (alignment - 1));
+ CUTE_SYNC_ASSERT(is_power_of_2);
+ void* p = CUTE_SYNC_ALLOC(size + alignment, mem_ctx);
+ if (!p) return 0;
+ unsigned char offset = (unsigned char)((size_t)p & (alignment - 1));
+ p = (void*)CUTE_SYNC_ALIGN_PTR(p + 1, alignment);
+ *((char*)p - 1) = alignment - offset;
+ return p;
+}
+
+static void cute_free_aligned(void* p, void* mem_ctx)
+{
+ (void)mem_ctx;
+ if (!p) return;
+ size_t alignment = (size_t)*((char*)p - 1) & 0xFF;
+ CUTE_SYNC_FREE((char*)p - alignment, mem_ctx);
+}
+
+typedef struct cute_task_t
+{
+ void (*do_work)(void*);
+ void* param;
+} cute_task_t;
+
+typedef struct cute_threadpool_t
+{
+ int task_capacity;
+ int task_count;
+ cute_task_t* tasks;
+ cute_mutex_t task_mutex;
+
+ int thread_count;
+ cute_thread_t** threads;
+
+ cute_atomic_int_t running;
+ cute_mutex_t sem_mutex;
+ cute_semaphore_t semaphore;
+ void* mem_ctx;
+} cute_threadpool_t;
+
+int cute_try_pop_task_internal(cute_threadpool_t* pool, cute_task_t* task)
+{
+ cute_lock(&pool->task_mutex);
+
+ if (pool->task_count) {
+ *task = pool->tasks[--pool->task_count];
+ cute_unlock(&pool->task_mutex);
+ return 1;
+ }
+
+ cute_unlock(&pool->task_mutex);
+ return 0;
+}
+
+int cute_worker_thread_internal(void* udata)
+{
+ cute_threadpool_t* pool = (cute_threadpool_t*)udata;
+ while (cute_atomic_get(&pool->running)) {
+ cute_task_t task;
+ if (cute_try_pop_task_internal(pool, &task)) {
+ task.do_work(task.param);
+ }
+
+ cute_semaphore_wait(&pool->semaphore);
+ }
+ return 0;
+}
+
+cute_threadpool_t* cute_threadpool_create(int thread_count, void* mem_ctx)
+{
+ if (CUTE_SYNC_CACHELINE_SIZE < cute_cacheline_size()) return 0;
+
+ cute_threadpool_t* pool = (cute_threadpool_t*)CUTE_SYNC_ALLOC(sizeof(cute_threadpool_t), mem_ctx);
+ pool->task_capacity = 64;
+ pool->task_count = 0;
+ pool->tasks = (cute_task_t*)cute_malloc_aligned(sizeof(cute_task_t) * pool->task_capacity, CUTE_SYNC_CACHELINE_SIZE, mem_ctx);
+ pool->task_mutex = cute_mutex_create();
+ pool->thread_count = thread_count;
+ pool->threads = (cute_thread_t**)cute_malloc_aligned(sizeof(cute_thread_t*) * thread_count, CUTE_SYNC_CACHELINE_SIZE, mem_ctx);
+ cute_atomic_set(&pool->running, 1);
+ pool->sem_mutex = cute_mutex_create();
+ pool->semaphore = cute_semaphore_create(0);
+ pool->mem_ctx = mem_ctx;
+
+ for (int i = 0; i < thread_count; ++i) {
+ pool->threads[i] = cute_thread_create(cute_worker_thread_internal, 0, pool);
+ }
+
+ return pool;
+}
+
+void cute_threadpool_add_task(cute_threadpool_t* pool, void (*func)(void*), void* param)
+{
+ cute_lock(&pool->task_mutex);
+
+ if (pool->task_count == pool->task_capacity) {
+ int new_cap = pool->task_capacity * 2;
+ cute_task_t* new_tasks = (cute_task_t*)cute_malloc_aligned(sizeof(cute_task_t) * new_cap, CUTE_SYNC_CACHELINE_SIZE, pool->mem_ctx);
+ CUTE_SYNC_MEMCPY(new_tasks, pool->tasks, sizeof(cute_task_t) * pool->task_count);
+ cute_free_aligned(pool->tasks, pool->mem_ctx);
+ pool->task_capacity = new_cap;
+ pool->tasks = new_tasks;
+ }
+
+ cute_task_t task;
+ task.do_work = func;
+ task.param = param;
+ pool->tasks[pool->task_count++] = task;
+
+ cute_unlock(&pool->task_mutex);
+}
+
+void cute_threadpool_kick_and_wait(cute_threadpool_t* pool)
+{
+ cute_threadpool_kick(pool);
+
+ while (pool->task_count) {
+ cute_task_t task;
+ if (cute_try_pop_task_internal(pool, &task)) {
+ cute_semaphore_try(&pool->semaphore);
+ task.do_work(task.param);
+ }
+ CUTE_SYNC_YIELD();
+ }
+}
+
+void cute_threadpool_kick(cute_threadpool_t* pool)
+{
+ if (pool->task_count) {
+ int count = pool->task_count < pool->thread_count ? pool->task_count : pool->thread_count;
+ for (int i = 0; i < count; ++i) {
+ cute_semaphore_post(&pool->semaphore);
+ }
+ }
+}
+
+void cute_threadpool_destroy(cute_threadpool_t* pool)
+{
+ cute_atomic_set(&pool->running, 0);
+
+ for (int i = 0; i < pool->thread_count; ++i) {
+ cute_semaphore_post(&pool->semaphore);
+ }
+
+ for (int i = 0; i < pool->thread_count; ++i) {
+ cute_thread_wait(pool->threads[i]);
+ }
+
+ cute_free_aligned(pool->tasks, pool->mem_ctx);
+ cute_free_aligned(pool->threads, pool->mem_ctx);
+ void* mem_ctx = pool->mem_ctx;
+ (void)mem_ctx;
+ CUTE_SYNC_FREE(pool, mem_ctx);
+}
+
+#endif // CUTE_SYNC_IMPLEMENTATION_ONCE
+#endif // CUTE_SYNC_IMPLEMENTATION
+
+/*
+ ------------------------------------------------------------------------------
+ This software is available under 2 licenses - you may choose the one you like.
+ ------------------------------------------------------------------------------
+ ALTERNATIVE A - zlib license
+ Copyright (c) 2019 Randy Gaul http://www.randygaul.net
+ This software is provided 'as-is', without any express or implied warranty.
+ In no event will the authors be held liable for any damages arising from
+ the use of this software.
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not
+ be misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+ ------------------------------------------------------------------------------
+ ALTERNATIVE B - Public Domain (www.unlicense.org)
+ This is free and unencumbered software released into the public domain.
+ Anyone is free to copy, modify, publish, use, compile, sell, or distribute this
+ software, either in source code form or as a compiled binary, for any purpose,
+ commercial or non-commercial, and by any means.
+ In jurisdictions that recognize copyright laws, the author or authors of this
+ software dedicate any and all copyright interest in the software to the public
+ domain. We make this dedication for the benefit of the public at large and to
+ the detriment of our heirs and successors. We intend this dedication to be an
+ overt act of relinquishment in perpetuity of all present and future rights to
+ this software under copyright law.
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ ------------------------------------------------------------------------------
+*/