2025-04-28 08:47:28 +08:00

366 lines
12 KiB
C

/*****************************************************************************
* win32thread.c: windows threading
*****************************************************************************
* Copyright (C) 2010-2025 x264 project
*
* Authors: Steven Walters <kemuri9@gmail.com>
* Pegasys Inc. <http://www.pegasys-inc.com>
* Henrik Gramner <henrik@gramner.com>
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02111, USA.
*
* This program is also available under a commercial proprietary license.
* For more information, contact us at licensing@x264.com.
*****************************************************************************/
/* Microsoft's way of supporting systems with >64 logical cpus can be found at
* http://www.microsoft.com/whdc/system/Sysinternals/MoreThan64proc.mspx */
/* Based on the agreed standing that x264 does not need to utilize >64 logical cpus,
* this API does not detect nor utilize more than 64 cpus for systems that have them. */
#include "base.h"
#if HAVE_WINRT
/* _beginthreadex() is technically the correct option, but it's only available for Desktop applications.
* Using CreateThread() as an alternative works on Windows Store and Windows Phone 8.1+ as long as we're
* using a dynamically linked MSVCRT which happens to be a requirement for WinRT applications anyway */
#define _beginthreadex CreateThread
#define InitializeCriticalSectionAndSpinCount(a, b) InitializeCriticalSectionEx(a, b, CRITICAL_SECTION_NO_DEBUG_INFO)
#define WaitForSingleObject(a, b) WaitForSingleObjectEx(a, b, FALSE)
#else
#include <process.h>
#endif
/* number of times to spin a thread about to block on a locked mutex before retrying and sleeping if still locked */
#define X264_SPIN_COUNT 0
/* global mutex for replacing MUTEX_INITIALIZER instances */
static x264_pthread_mutex_t static_mutex;
/* _beginthreadex requires that the start routine is __stdcall */
static unsigned __stdcall win32thread_worker( void *arg )
{
x264_pthread_t *h = arg;
*h->p_ret = h->func( h->arg );
return 0;
}
int x264_pthread_create( x264_pthread_t *thread, const x264_pthread_attr_t *attr,
void *(*start_routine)( void* ), void *arg )
{
thread->func = start_routine;
thread->arg = arg;
thread->p_ret = &thread->ret;
thread->ret = NULL;
thread->handle = (void*)_beginthreadex( NULL, 0, win32thread_worker, thread, 0, NULL );
return !thread->handle;
}
int x264_pthread_join( x264_pthread_t thread, void **value_ptr )
{
DWORD ret = WaitForSingleObject( thread.handle, INFINITE );
if( ret != WAIT_OBJECT_0 )
return -1;
if( value_ptr )
*value_ptr = *thread.p_ret;
CloseHandle( thread.handle );
return 0;
}
int x264_pthread_mutex_init( x264_pthread_mutex_t *mutex, const x264_pthread_mutexattr_t *attr )
{
return !InitializeCriticalSectionAndSpinCount( mutex, X264_SPIN_COUNT );
}
int x264_pthread_mutex_destroy( x264_pthread_mutex_t *mutex )
{
DeleteCriticalSection( mutex );
return 0;
}
int x264_pthread_mutex_lock( x264_pthread_mutex_t *mutex )
{
static const x264_pthread_mutex_t init = X264_PTHREAD_MUTEX_INITIALIZER;
if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
{
int ret = 0;
EnterCriticalSection( &static_mutex );
if( !memcmp( mutex, &init, sizeof(x264_pthread_mutex_t) ) )
ret = x264_pthread_mutex_init( mutex, NULL );
LeaveCriticalSection( &static_mutex );
if( ret )
return ret;
}
EnterCriticalSection( mutex );
return 0;
}
int x264_pthread_mutex_unlock( x264_pthread_mutex_t *mutex )
{
LeaveCriticalSection( mutex );
return 0;
}
void x264_win32_threading_destroy( void )
{
x264_pthread_mutex_destroy( &static_mutex );
memset( &static_mutex, 0, sizeof(static_mutex) );
}
#if HAVE_WINRT
int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
{
InitializeConditionVariable( cond );
return 0;
}
int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
{
return 0;
}
int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
{
WakeAllConditionVariable( cond );
return 0;
}
int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
{
WakeConditionVariable( cond );
return 0;
}
int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
{
return !SleepConditionVariableCS( cond, mutex, INFINITE );
}
int x264_win32_threading_init( void )
{
return x264_pthread_mutex_init( &static_mutex, NULL );
}
int x264_pthread_num_processors_np( void )
{
SYSTEM_INFO si;
GetNativeSystemInfo(&si);
return si.dwNumberOfProcessors;
}
#else
static struct
{
/* function pointers to conditional variable API on windows 6.0+ kernels */
void (WINAPI *cond_broadcast)( x264_pthread_cond_t *cond );
void (WINAPI *cond_init)( x264_pthread_cond_t *cond );
void (WINAPI *cond_signal)( x264_pthread_cond_t *cond );
BOOL (WINAPI *cond_wait)( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex, DWORD milliseconds );
} thread_control;
/* for pre-Windows 6.0 platforms we need to define and use our own condition variable and api */
typedef struct
{
x264_pthread_mutex_t mtx_broadcast;
x264_pthread_mutex_t mtx_waiter_count;
volatile int waiter_count;
HANDLE semaphore;
HANDLE waiters_done;
volatile int is_broadcast;
} x264_win32_cond_t;
int x264_pthread_cond_init( x264_pthread_cond_t *cond, const x264_pthread_condattr_t *attr )
{
if( thread_control.cond_init )
{
thread_control.cond_init( cond );
return 0;
}
/* non native condition variables */
x264_win32_cond_t *win32_cond = calloc( 1, sizeof(x264_win32_cond_t) );
if( !win32_cond )
return -1;
cond->Ptr = win32_cond;
win32_cond->semaphore = CreateSemaphoreW( NULL, 0, 0x7fffffff, NULL );
if( !win32_cond->semaphore )
return -1;
if( x264_pthread_mutex_init( &win32_cond->mtx_waiter_count, NULL ) )
return -1;
if( x264_pthread_mutex_init( &win32_cond->mtx_broadcast, NULL ) )
return -1;
win32_cond->waiters_done = CreateEventW( NULL, FALSE, FALSE, NULL );
if( !win32_cond->waiters_done )
return -1;
return 0;
}
int x264_pthread_cond_destroy( x264_pthread_cond_t *cond )
{
/* native condition variables do not destroy */
if( thread_control.cond_init )
return 0;
/* non native condition variables */
x264_win32_cond_t *win32_cond = cond->Ptr;
CloseHandle( win32_cond->semaphore );
CloseHandle( win32_cond->waiters_done );
x264_pthread_mutex_destroy( &win32_cond->mtx_broadcast );
x264_pthread_mutex_destroy( &win32_cond->mtx_waiter_count );
free( win32_cond );
return 0;
}
int x264_pthread_cond_broadcast( x264_pthread_cond_t *cond )
{
if( thread_control.cond_broadcast )
{
thread_control.cond_broadcast( cond );
return 0;
}
/* non native condition variables */
x264_win32_cond_t *win32_cond = cond->Ptr;
x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
int have_waiter = 0;
if( win32_cond->waiter_count )
{
win32_cond->is_broadcast = 1;
have_waiter = 1;
}
if( have_waiter )
{
ReleaseSemaphore( win32_cond->semaphore, win32_cond->waiter_count, NULL );
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
WaitForSingleObject( win32_cond->waiters_done, INFINITE );
win32_cond->is_broadcast = 0;
}
else
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
}
int x264_pthread_cond_signal( x264_pthread_cond_t *cond )
{
if( thread_control.cond_signal )
{
thread_control.cond_signal( cond );
return 0;
}
/* non-native condition variables */
x264_win32_cond_t *win32_cond = cond->Ptr;
x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
int have_waiter = win32_cond->waiter_count;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
if( have_waiter )
{
ReleaseSemaphore( win32_cond->semaphore, 1, NULL );
WaitForSingleObject( win32_cond->waiters_done, INFINITE );
}
return x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
}
int x264_pthread_cond_wait( x264_pthread_cond_t *cond, x264_pthread_mutex_t *mutex )
{
if( thread_control.cond_wait )
return !thread_control.cond_wait( cond, mutex, INFINITE );
/* non native condition variables */
x264_win32_cond_t *win32_cond = cond->Ptr;
x264_pthread_mutex_lock( &win32_cond->mtx_broadcast );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
win32_cond->waiter_count++;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
x264_pthread_mutex_unlock( &win32_cond->mtx_broadcast );
// unlock the external mutex
x264_pthread_mutex_unlock( mutex );
WaitForSingleObject( win32_cond->semaphore, INFINITE );
x264_pthread_mutex_lock( &win32_cond->mtx_waiter_count );
win32_cond->waiter_count--;
int last_waiter = !win32_cond->waiter_count || !win32_cond->is_broadcast;
x264_pthread_mutex_unlock( &win32_cond->mtx_waiter_count );
if( last_waiter )
SetEvent( win32_cond->waiters_done );
// lock the external mutex
return x264_pthread_mutex_lock( mutex );
}
int x264_win32_threading_init( void )
{
/* find function pointers to API functions, if they exist */
HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
thread_control.cond_init = (void*)GetProcAddress( kernel_dll, "InitializeConditionVariable" );
if( thread_control.cond_init )
{
/* we're on a windows 6.0+ kernel, acquire the rest of the functions */
thread_control.cond_broadcast = (void*)GetProcAddress( kernel_dll, "WakeAllConditionVariable" );
thread_control.cond_signal = (void*)GetProcAddress( kernel_dll, "WakeConditionVariable" );
thread_control.cond_wait = (void*)GetProcAddress( kernel_dll, "SleepConditionVariableCS" );
}
return x264_pthread_mutex_init( &static_mutex, NULL );
}
int x264_pthread_num_processors_np( void )
{
DWORD_PTR system_cpus, process_cpus = 0;
int cpus = 0;
/* GetProcessAffinityMask returns affinities of 0 when the process has threads in multiple processor groups.
* On platforms that support processor grouping, use GetThreadGroupAffinity to get the current thread's affinity instead. */
#if ARCH_X86_64
/* find function pointers to API functions specific to x86_64 platforms, if they exist */
HANDLE kernel_dll = GetModuleHandleW( L"kernel32.dll" );
BOOL (*get_thread_affinity)( HANDLE thread, void *group_affinity ) = (void*)GetProcAddress( kernel_dll, "GetThreadGroupAffinity" );
if( get_thread_affinity )
{
/* running on a platform that supports >64 logical cpus */
struct /* GROUP_AFFINITY */
{
ULONG_PTR mask; // KAFFINITY = ULONG_PTR
USHORT group;
USHORT reserved[3];
} thread_affinity;
if( get_thread_affinity( GetCurrentThread(), &thread_affinity ) )
process_cpus = thread_affinity.mask;
}
#endif
if( !process_cpus )
GetProcessAffinityMask( GetCurrentProcess(), &process_cpus, &system_cpus );
for( DWORD_PTR bit = 1; bit; bit <<= 1 )
cpus += !!(process_cpus & bit);
return cpus ? cpus : 1;
}
#endif