diff --git a/kernel.cu b/kernel.cu
index 471656e..59f17ec 100644
--- a/kernel.cu
+++ b/kernel.cu
@@ -4,7 +4,7 @@
 #include <chrono>
 #include <ctime>
 #include <stdio.h>
-
+#include <pthread.h>
 #include <stdlib.h>
 #include "UCPClient.h"
 
@@ -29,15 +29,47 @@
 // #pragma comment(lib, "nvapi.lib")
 // #pragma comment(lib, "nvapi64.lib")
 
+void promptExit(int exitCode);
+pthread_mutex_t stratum_sock_lock;
+pthread_mutex_t stratum_log_lock;
+
 #ifdef __INTELLISENSE__
 #define __launch_bounds__(blocksize)
 #endif
 //#define ROTR64(x, n)  (((x) >> (n)) | ((x) << (64 - (n))))
 #define ROTR(x,n) ROTR64(x,n)
+#define MAX_GPUS 16
+#define CUDA_SAFE_CALL(call)                                          \
+do {                                                                  \
+	cudaError_t err = call;                                           \
+	if (cudaSuccess != err) {                                         \
+		fprintf(stderr, "Cuda error in func '%s' at line %i : %s.\n", \
+		         __FUNCTION__, __LINE__, cudaGetErrorString(err) );   \
+		promptExit(-1);                                           \
+	}                                                                 \
+} while (0)
+
 
 
 __constant__ static uint64_t __align__(8) c_512[16];
 __constant__ static uint64_t __align__(8) c_vblake[8];
+__constant__ uint64_t headerIn[8];
+static uint32_t *d_nonces[MAX_GPUS];
+static uint64_t *dev_nonceStart[MAX_GPUS];
+
+
+__host__
+void veri_init(int thr_id)
+{
+	CUDA_SAFE_CALL(cudaMalloc(&d_nonces[thr_id], 1 * sizeof(uint32_t)));
+	CUDA_SAFE_CALL(cudaMalloc(&dev_nonceStart[thr_id], 1 * sizeof(uint64_t)));
+};
+void veri_setBlock(void *blockf)
+{
+	
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(headerIn, blockf, 8 * sizeof(uint64_t), 0, cudaMemcpyHostToDevice));
+};
+
 __device__ __forceinline__ uint64_t ROTR64_L(uint64_t value,
 	const int offset) {
 	uint2 result;
@@ -78,13 +110,9 @@ __device__ __forceinline__ uint64_t ROTR64_H(uint64_t value,
 	v[d] ^= (~v[a] & ~v[b] & ~v[c]) | (~v[a] & v[b] & v[c]) | (v[a] & ~v[b] & v[c])   | (v[a] & v[b] & ~v[c]); \
     v[d] ^= (~v[a] & ~v[b] & v[c]) | (~v[a] & v[b] & ~v[c]) | (v[a] & ~v[b] & ~v[c]) | (v[a] & v[b] & v[c]); \
 }
-cudaStream_t cudastream;
 
-uint32_t *blockHeadermobj = nullptr;
-uint32_t *midStatemobj = nullptr;
-uint32_t *nonceOutmobj = nullptr;
 
-cudaError_t grindNonces(uint32_t *nonceResult, uint64_t *hashStart, const uint64_t *header);
+void grindNonces(uint32_t startNonce, uint32_t *nonceResult, uint64_t *hashStart, const uint64_t *header, int dev_id);
 __device__ __constant__
 static const uint8_t c_sigma_big[16][16] = {
 	{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 },
@@ -205,8 +233,8 @@ uint64_t vBlake2(const uint64_t h0, const uint64_t h1, const uint64_t h2, const
 #endif
 
 #if HIGH_RESOURCE
-#define DEFAULT_BLOCKSIZE 0xd0000
-#define DEFAULT_THREADS_PER_BLOCK 1024
+#define DEFAULT_BLOCKSIZE 0x80000
+#define DEFAULT_THREADS_PER_BLOCK 256
 #else
 #define DEFAULT_BLOCKSIZE 512
 #define DEFAULT_THREADS_PER_BLOCK 512
@@ -214,23 +242,39 @@ uint64_t vBlake2(const uint64_t h0, const uint64_t h1, const uint64_t h2, const
 
 int blocksize = DEFAULT_BLOCKSIZE;
 int threadsPerBlock = DEFAULT_THREADS_PER_BLOCK;
+int opt_n_threads = 0;
+short device_map[MAX_GPUS] = { 0 };
+int gpu_threads = 1;
+int active_gpus;
+char * device_name[MAX_GPUS];
+long  device_sm[MAX_GPUS] = { 0 };
+short device_mpcount[MAX_GPUS] = { 0 };
+int init[MAX_GPUS] = { 0 };
 
 bool verboseOutput = false;
+struct mining_attr {
+	int dev_id;
+	string host;
+	int port;
+	string username;
+	string password;
+
+};
 
 /*
 * Kernel function to search a range of nonces for a solution falling under the macro-configured difficulty (CPU=2^24, GPU=2^32).
 */
-//__launch_bounds__(256, 2)
-__global__ void vblakeHasher(uint32_t *nonceStart, uint32_t *nonceOut, uint64_t *hashStartOut, uint64_t const *headerIn)
+__global__ void vblakeHasher(uint32_t startnonce, uint32_t *nonceOut, uint64_t *hashStartOut)
 {
 	// Generate a unique starting nonce for each thread that doesn't overlap with the work of any other thread
-	const uint32_t workStart = ((blockDim.x * blockIdx.x + threadIdx.x)) + nonceStart[0];
+	uint32_t nonce = (blockDim.x * blockIdx.x + threadIdx.x) + startnonce;
 	__shared__ uint64_t s_u512[16],s_vblake[8];
 
 	if (threadIdx.x < 16U) s_u512[threadIdx.x] = c_512[threadIdx.x];
 	if (threadIdx.x < 8U) s_vblake[threadIdx.x] = c_vblake[threadIdx.x];
+
 	uint64_t nonceHeaderSection = headerIn[7];
-	unsigned int nonce = workStart;
+	
 	//for (unsigned int nonce = workStart; nonce < workStart + WORK_PER_THREAD; nonce++) {
 		// Zero out nonce position and write new nonce to last 32 bits of prototype header
 		nonceHeaderSection &= 0x00000000FFFFFFFFu;
@@ -250,7 +294,28 @@ __global__ void vblakeHasher(uint32_t *nonceStart, uint32_t *nonceOut, uint64_t
 		}
 	//}
 }
+int cuda_num_devices()
+{
+	int version = 0, GPU_N = 0;
+	cudaError_t err = cudaDriverGetVersion(&version);
+	if (err != cudaSuccess) {
+		printf("Unable to query CUDA driver version! Is an nVidia driver installed?\n");
+		exit(1);
+	}
+
+	if (version < CUDART_VERSION) {
+		printf("Your system does not support CUDA %d.%d API!\n",
+			CUDART_VERSION / 1000, (CUDART_VERSION % 1000) / 10);
+		exit(1);
+	}
 
+	err = cudaGetDeviceCount(&GPU_N);
+	if (err != cudaSuccess) {
+		printf("Unable to query number of CUDA devices! Is an nVidia driver installed?\n");
+		exit(1);
+	}
+	return GPU_N;
+}
 void promptExit(int exitCode)
 {
 	cout << "Exiting in 10 seconds..." << endl;
@@ -272,12 +337,12 @@ void embedTimestampInHeader(uint8_t *header, uint32_t timestamp)
 /**
 * Returns a 64-byte header to attempt to mine with.
 */
-uint64_t* getWork(UCPClient& ucpClient, uint32_t timestamp)
+void getWork(UCPClient& ucpClient, uint32_t timestamp, uint64_t *header)
 {
-	uint64_t *header = new uint64_t[8];
+	//uint64_t *header = new uint64_t[8];
 	ucpClient.copyHeaderToHash((byte *)header);
 	embedTimestampInHeader((uint8_t*)header, timestamp);
-	return header;
+	//return header;
 }
 
 int deviceToUse = 0;
@@ -330,7 +395,36 @@ void vprintf(char* toprint) {
 		printf(toprint);
 	}
 }
+void cuda_devicenames()
+{
+	cudaError_t err;
+	int GPU_N;
+	err = cudaGetDeviceCount(&GPU_N);
+	if (err != cudaSuccess)
+	{
+		printf("Unable to query number of CUDA devices! Is an nVidia driver installed?");
+		exit(1);
+	}
 
+	if (opt_n_threads)
+		GPU_N = min(MAX_GPUS, opt_n_threads);
+	for (int i = 0; i < GPU_N; i++)
+	{
+			int dev_id = device_map[i];
+		cudaDeviceProp props;
+		cudaGetDeviceProperties(&props, dev_id);
+
+		device_sm[dev_id] = (props.major * 100 + props.minor * 10);
+		device_mpcount[dev_id] = (short)props.multiProcessorCount;
+
+		if (device_name[dev_id]) {
+			free(device_name[dev_id]);
+			device_name[dev_id] = NULL;
+		}
+
+			device_name[dev_id] = strdup(props.name);
+	}
+}
 void printHelpAndExit() {
 	printf("VeriBlock vBlake GPU CUDA Miner v1.0\n");
 	printf("Required Arguments:\n");
@@ -368,6 +462,38 @@ void net_deinit(void)
 	WSACleanup();
 #endif
 }
+static bool substringsearch(const char *haystack, const char *needle, int &match)
+{
+	int hlen = (int)strlen(haystack);
+	int nlen = (int)strlen(needle);
+	for (int i = 0; i < hlen; ++i)
+	{
+		if (haystack[i] == ' ') continue;
+		int j = 0, x = 0;
+		while (j < nlen)
+		{
+			if (haystack[i + x] == ' ') { ++x; continue; }
+			if (needle[j] == ' ') { ++j; continue; }
+			if (needle[j] == '#') return ++match == needle[j + 1] - '0';
+			if (tolower(haystack[i + x]) != tolower(needle[j])) break;
+			++j; ++x;
+		}
+		if (j == nlen) return true;
+	}
+	return false;
+}
+int cuda_finddevice(char *name)
+{
+	int num = cuda_num_devices();
+	int match = 0;
+	for (int i = 0; i < num; ++i)
+	{
+		cudaDeviceProp props;
+		if (cudaGetDeviceProperties(&props, i) == cudaSuccess)
+			if (substringsearch(props.name, name, match)) return i;
+	}
+	return -1;
+}
 
 string net_dns_resolve(const char* hostname)
 {
@@ -420,6 +546,142 @@ string net_dns_resolve(const char* hostname)
 }
 
 char outputBuffer[8192];
+
+
+void* miner_thread(void* arg){
+	// Run initialization of device before beginning timer
+	struct mining_attr *arg_Struct =
+		(struct mining_attr*) arg;
+
+	pthread_mutex_lock(&stratum_sock_lock);
+	UCPClient ucpClient(arg_Struct->host, arg_Struct->port, arg_Struct->username, arg_Struct->password);
+
+	byte target[24];
+	ucpClient.copyMiningTarget(target);
+	uint64_t header[8];
+	
+	getWork(ucpClient, (uint32_t)std::time(0),header);
+	pthread_mutex_unlock(&stratum_sock_lock);
+
+	pthread_mutex_lock(&stratum_log_lock);
+	unsigned long long startTime = std::time(0);
+	pthread_mutex_unlock(&stratum_log_lock);
+	//mutex unlock
+	
+	uint32_t nonceResult[1] = { 0 };
+	uint64_t hashStart[1] = { 0 };
+	uint32_t startNonce = 0;
+	unsigned long long hashes = 0;
+	uint32_t count = 0;
+	int numLines = 0;
+
+	// Mining loop
+	while (true) {
+		vprintf("top of mining loop\n");
+		count++;
+		long timestamp = (long)std::time(0);
+		//delete[] header;
+		vprintf("Getting work...\n");
+
+		pthread_mutex_lock(&stratum_sock_lock);
+		getWork(ucpClient, timestamp, header);
+		vprintf("Getting job id...\n");
+		int jobId = ucpClient.getJobId();
+		pthread_mutex_unlock(&stratum_sock_lock);
+
+		count++;
+		vprintf("Running kernel...\n");
+		grindNonces(startNonce, nonceResult, hashStart, header, arg_Struct->dev_id);
+		
+		vprintf("Kernel finished...\n");
+		
+		//mutex lock
+		pthread_mutex_lock(&stratum_log_lock);
+		unsigned long long totalTime = std::time(0) - startTime;
+		pthread_mutex_unlock(&stratum_log_lock);
+		//todo mutex unlock
+		hashes += (blocksize * threadsPerBlock * WORK_PER_THREAD);
+		if ((uint64_t)startNonce +  (uint64_t)(blocksize * threadsPerBlock * WORK_PER_THREAD) < (uint64_t)0xffffffff) {
+			startNonce += (blocksize * threadsPerBlock * WORK_PER_THREAD);
+		}
+		else
+			startNonce = 0;
+
+		double hashSpeed = (double)hashes;
+		hashSpeed /= (totalTime * 1024 * 1024);
+
+		if (count % 10 == 0) {
+			//mutex lock
+			pthread_mutex_lock(&stratum_sock_lock);
+
+			int validShares = ucpClient.getValidShares();
+			int invalidShares = ucpClient.getInvalidShares();
+			int totalAccountedForShares = invalidShares + validShares;
+			int totalSubmittedShares = ucpClient.getSentShares();
+			int unaccountedForShares = totalSubmittedShares - totalAccountedForShares;
+			pthread_mutex_unlock(&stratum_sock_lock);
+			//mutex unlock
+			double percentage = ((double)validShares) / totalAccountedForShares;
+			percentage *= 100;
+			// printf("[GPU #%d (%s)] : %f MH/second    valid shares: %d/%d/%d (%.3f%%)\n", deviceToUse, selectedDeviceName.c_str(), hashSpeed, validShares, totalAccountedForShares, totalSubmittedShares, percentage);
+
+			printf("[GPU: %d %s] : %0.2f MH/s shares: %d/%d/%d (%.3f%%)\n", arg_Struct->dev_id, device_name[arg_Struct->dev_id], hashSpeed, validShares, totalAccountedForShares, totalSubmittedShares, percentage);
+		}
+
+		if (nonceResult[0] != 0x01000000 && nonceResult[0] != 0) {
+			uint32_t nonce = *nonceResult;
+			nonce = (((nonce & 0xFF000000) >> 24) | ((nonce & 0x00FF0000) >> 8) | ((nonce & 0x0000FF00) << 8) | ((nonce & 0x000000FF) << 24));
+			
+			pthread_mutex_lock(&stratum_sock_lock);
+			ucpClient.submitWork(jobId, timestamp, nonce);
+			pthread_mutex_unlock(&stratum_sock_lock);
+			
+			nonceResult[0] = 0;
+
+			char line[100];
+
+			// Hash coming from GPU is reversed
+			uint64_t hashFlipped = 0;
+			hashFlipped |= (hashStart[0] & 0x00000000000000FF) << 56;
+			hashFlipped |= (hashStart[0] & 0x000000000000FF00) << 40;
+			hashFlipped |= (hashStart[0] & 0x0000000000FF0000) << 24;
+			hashFlipped |= (hashStart[0] & 0x00000000FF000000) << 8;
+			hashFlipped |= (hashStart[0] & 0x000000FF00000000) >> 8;
+			hashFlipped |= (hashStart[0] & 0x0000FF0000000000) >> 24;
+			hashFlipped |= (hashStart[0] & 0x00FF000000000000) >> 40;
+			hashFlipped |= (hashStart[0] & 0xFF00000000000000) >> 56;
+
+#if CPU_SHARES 
+			sprintf(line, "\t Share Found @ 2^24! {%#018llx} [nonce: %#08lx]", hashFlipped, nonce);
+#else
+			sprintf(line, "\t Share Found @ 2^32! {%#018llx} [nonce: %#08lx]", hashFlipped, nonce);
+#endif
+
+			cout << line << endl;
+			vprintf("Logging\n");
+			Log::info(line);
+			vprintf("Done logging\n");
+			vprintf("Made line\n");
+
+			numLines++;
+
+			// Uncomment these lines to get access to this data for display purposes
+			/*
+			long long extraNonce = ucpClient.getStartExtraNonce();
+			int jobId = ucpClient.getJobId();
+			int encodedDifficulty = ucpClient.getEncodedDifficulty();
+			string previousBlockHashHex = ucpClient.getPreviousBlockHash();
+			string merkleRoot = ucpClient.getMerkleRoot();
+			*/
+
+		}
+		vprintf("About to restart loop...\n");
+	}
+
+	printf("Resetting device...\n");
+	CUDA_SAFE_CALL(cudaDeviceReset());
+	
+}
 int main(int argc, char *argv[])
 {
 	// Check for help argument (only -h)
@@ -450,13 +712,48 @@ int main(int argc, char *argv[])
 			printf("%s\n", argument);
 			if (argument[0] == '-' && argument[1] == 'd')
 			{
-				if (strlen(argv[i + 1]) == 2) {
-					// device num >= 10
-					deviceToUse = (argv[i + 1][0] - 48) * 10 + (argv[i + 1][1] - 48);
+
+				int device_thr[MAX_GPUS] = { 0 };
+				int ngpus = cuda_num_devices();
+				char* pch = strtok(argv[i + 1], ",");
+				opt_n_threads = 0;
+				while (pch != NULL && opt_n_threads < MAX_GPUS) {
+					if (pch[0] >= '0' && pch[0] <= '9' && strlen(pch) <= 2)
+					{
+						if (atoi(pch) < ngpus)
+							device_map[opt_n_threads++] = atoi(pch);
+						else {
+							printf("Non-existant CUDA device #%d specified in -d option\n\n", atoi(pch));
+							printHelpAndExit();
+						}
+					}
+					else {
+						int device = cuda_finddevice(pch);
+						if (device >= 0 && device < ngpus)
+							device_map[opt_n_threads++] = device;
+						else {
+							printf("Non-existant CUDA device '%s' specified in -d option\n\n", pch);
+							printHelpAndExit();
+						}
+					}
+					pch = strtok(NULL, ",");
+				}
+				// count threads per gpu
+				for (int n = 0; n < opt_n_threads; n++) {
+					int device = device_map[n];
+					device_thr[device]++;
 				}
-				else {
-					deviceToUse = argv[i + 1][0] - 48;
+				for (int n = 0; n < ngpus; n++) {
+					gpu_threads = max(gpu_threads, device_thr[n]);
 				}
+
+				//  if (strlen(argv[i + 1]) == 2) {
+				//  device num >= 10
+				//	deviceToUse = (argv[i + 1][0] - 48) * 10 + (argv[i + 1][1] - 48);
+				//  }
+				//  else {
+				//	deviceToUse = argv[i + 1][0] - 48;
+				//  }
 			}
 			else if (!strcmp(argument, "-o"))
 			{
@@ -516,6 +813,10 @@ int main(int argc, char *argv[])
 		printHelpAndExit();
 	}
 
+	pthread_mutex_init(&stratum_sock_lock, NULL);
+	pthread_mutex_init(&stratum_log_lock, NULL);
+
+
 	if (HIGH_RESOURCE) {
 		sprintf(outputBuffer, "Resource Utilization: HIGH");
 		cerr << outputBuffer << endl;
@@ -527,16 +828,6 @@ int main(int argc, char *argv[])
 		Log::info(outputBuffer);
 	}
 
-	if (NVML) {
-		sprintf(outputBuffer, "NVML Status: ENABLED");
-		cerr << outputBuffer << endl;
-		Log::info(outputBuffer);
-	}
-	else {
-		sprintf(outputBuffer, "NVML Status: DISABLED");
-		cerr << outputBuffer << endl;
-		Log::info(outputBuffer);
-	}
 
 	if (CPU_SHARES) {
 		sprintf(outputBuffer, "Share Type: CPU");
@@ -560,8 +851,7 @@ int main(int argc, char *argv[])
 		Log::info(outputBuffer);
 	}
 
-	// No effect if NVML is not enabled
-	readyNVML(deviceToUse);
+
 
 #ifdef _WIN32
 	HANDLE consoleHandle = GetStdHandle(STD_OUTPUT_HANDLE);
@@ -603,426 +893,81 @@ int main(int argc, char *argv[])
 	sprintf(outputBuffer, "Attempting to mine to pool %s:%d with username %s and password %s...", host.c_str(), port, username.c_str(), password.c_str());
 	cout << outputBuffer << endl;
 	Log::info(outputBuffer);
-	UCPClient ucpClient(host, port, username, password);
 
-	byte target[24];
-	ucpClient.copyMiningTarget(target);
 
-	sprintf(outputBuffer, "Using Device: %d\n\n", deviceToUse);
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
 
-	int version, ret;
-	ret = cudaDriverGetVersion(&version);
-	if (ret != cudaSuccess)
-	{
-		sprintf(outputBuffer, "Error when getting CUDA driver version: %d", ret);
-		cout << outputBuffer << endl;
-		Log::error(outputBuffer);
-		promptExit(-1);
-	}
 
-	int runtimeVersion;
-	ret = cudaRuntimeGetVersion(&runtimeVersion);
-	if (ret != cudaSuccess)
-	{
-		sprintf(outputBuffer, "Error when getting CUDA runtime version: %d", ret);
-		cout << outputBuffer << endl;
-		Log::error(outputBuffer);
-		promptExit(-1);
+	active_gpus = cuda_num_devices();
+	if (active_gpus == 0) {
+		printf("No CUDA devices found! terminating.\n");
+		exit(1);
 	}
-
-
-	int deviceCount;
-	ret = cudaGetDeviceCount(&deviceCount);
-	if (ret != cudaSuccess)
-	{
-		sprintf(outputBuffer, "Error when getting CUDA device count: %d", ret);
-		cout << outputBuffer << endl;
-		Log::error(outputBuffer);
-		promptExit(-1);
+	for (int i = 0; i < MAX_GPUS; i++) {
+		device_map[i] = i % active_gpus;
+		device_name[i] = NULL;
 	}
+	cuda_devicenames();
 
-	cudaDeviceProp deviceProp;
-
-#if NVML
-	char driver[NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE];
-	nvmlSystemGetDriverVersion(driver, NVML_SYSTEM_DRIVER_VERSION_BUFFER_SIZE);
-#else
-	char driver[] = "???.?? (NVML NOT ENABLED)";
-#endif
 
-	sprintf(outputBuffer, "CUDA Version: %.1f", ((float)version / 1000));
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
-	sprintf(outputBuffer, "CUDA Runtime Version: %d", runtimeVersion);
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
-	sprintf(outputBuffer, "NVidia Driver Version: %s", driver);
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
-	sprintf(outputBuffer, "CUDA Devices: %d", deviceCount);
-	cout << outputBuffer << endl << endl;
-	Log::info(outputBuffer);
-
-	string selectedDeviceName;
-	// Print out information about all available CUDA devices on system
-	for (int count = 0; count < deviceCount; count++)
-	{
-		ret = cudaGetDeviceProperties(&deviceProp, count);
-		if (ret != cudaSuccess)
-		{
-			sprintf(outputBuffer, "An error occurred while getting the CUDA device properties: %d", ret);
-			cerr << outputBuffer << endl;
-			Log::error(outputBuffer);
-		}
-
-		if (count == deviceToUse) {
-			selectedDeviceName = deviceProp.name;
-		}
-
-		sprintf(outputBuffer, "Device #%d (%s):", count, deviceProp.name);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Clock Rate:              %d MHz", (deviceProp.clockRate / 1024));
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Is Integrated:           %s", (deviceProp.integrated == 0 ? "false" : "true"));
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Compute Capability:      %d.%d", deviceProp.major, deviceProp.minor);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Kernel Concurrency:      %d", deviceProp.concurrentKernels);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Max Grid Size:           %d x %d x %d", deviceProp.maxGridSize[0], deviceProp.maxGridSize[1], deviceProp.maxGridSize[2]);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Max Threads per Block:   %d", deviceProp.maxThreadsPerBlock);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Registers per Block:     %d", deviceProp.regsPerBlock);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Registers per SM:        %d", deviceProp.regsPerMultiprocessor);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Processor Count:         %d", deviceProp.multiProcessorCount);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Shared Memory/Block:     %zd", deviceProp.sharedMemPerBlock);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Shared Memory/Proc:      %zd", deviceProp.sharedMemPerMultiprocessor);
-		cout << outputBuffer << endl;
-		Log::info(outputBuffer);
-		sprintf(outputBuffer, "    Warp Size:               %d", deviceProp.warpSize);
+/*	for (int i = 0; i < opt_n_threads; i++) {
+		cudaSetDevice(device_map[i]);
+		cudaDeviceReset();
+		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+		cudaFuncSetCacheConfig(vblakeHasher, cudaFuncCachePreferL1);
+		cudaError_t e = cudaGetLastError();
+		sprintf(outputBuffer, "Last error: %s\n", cudaGetErrorString(e));
 		cout << outputBuffer << endl;
 		Log::info(outputBuffer);
-	}
 
-	sprintf(outputBuffer, "Mining on device #%d...", deviceToUse);
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
-
-	ret = cudaSetDevice(deviceToUse);
-	if (ret != cudaSuccess)
-	{
-		sprintf(outputBuffer, "CUDA encountered an error while setting the device to %d:%d", deviceToUse, ret);
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
 	}
+*/
 
-	cudaDeviceReset();
-
-	// Don't have GPU busy-wait on GPU
-	ret = cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
-
-	cudaError_t e = cudaGetLastError();
-	sprintf(outputBuffer, "Last error: %s\n", cudaGetErrorString(e));
-	cout << outputBuffer << endl;
-	Log::info(outputBuffer);
-
-	// Run initialization of device before beginning timer
-	uint64_t* header = getWork(ucpClient, (uint32_t)std::time(0));
-
-	unsigned long long startTime = std::time(0);
-	uint32_t nonceResult[1] = { 0 };
-	uint64_t hashStart[1] = { 0 };
-
-	unsigned long long hashes = 0;
-	cudaError_t cudaStatus;
-
-	uint32_t count = 0;
-
-	int numLines = 0;
-
-	// Mining loop
-	while (true) {
-		vprintf("top of mining loop\n");
-		count++;
-		long timestamp = (long)std::time(0);
-		delete[] header;
-		vprintf("Getting work...\n");
-		header = getWork(ucpClient, timestamp);
-		vprintf("Getting job id...\n");
-		int jobId = ucpClient.getJobId();
-		count++;
-		vprintf("Running kernel...\n");
-		cudaStatus = grindNonces(nonceResult, hashStart, header);
-		vprintf("Kernel finished...\n");
-		if (cudaStatus != cudaSuccess) {
-			cudaError_t e = cudaGetLastError();
-			sprintf(outputBuffer, "Error from running grindNonces: %s\nThis often occurs when a GPU overheats, has an unstable overclock, or has too aggressive launch parameters\nfor the vBlake kernel.\nYou can try using less aggressive settings, like:\n-tpb 256 -bs 256\nAnd try increasing these numbers until you hit instability issues again.", cudaGetErrorString(e));
-			cerr << outputBuffer << endl;
-			Log::error(outputBuffer);
-			promptExit(-1);
-		}
-
-		unsigned long long totalTime = std::time(0) - startTime;
-		hashes += (blocksize * threadsPerBlock * WORK_PER_THREAD);
-
-		double hashSpeed = (double)hashes;
-		hashSpeed /= (totalTime * 1024 * 1024);
-
-		if (count % 10 == 0) {
-			int validShares = ucpClient.getValidShares();
-			int invalidShares = ucpClient.getInvalidShares();
-			int totalAccountedForShares = invalidShares + validShares;
-			int totalSubmittedShares = ucpClient.getSentShares();
-			int unaccountedForShares = totalSubmittedShares - totalAccountedForShares;
-
-			double percentage = ((double)validShares) / totalAccountedForShares;
-			percentage *= 100;
-			// printf("[GPU #%d (%s)] : %f MH/second    valid shares: %d/%d/%d (%.3f%%)\n", deviceToUse, selectedDeviceName.c_str(), hashSpeed, validShares, totalAccountedForShares, totalSubmittedShares, percentage);
-
-			printf("[GPU #%d (%s)] : %0.2f MH/s shares: %d/%d/%d (%.3f%%)\n", deviceToUse, selectedDeviceName.c_str(), hashSpeed, validShares, totalAccountedForShares, totalSubmittedShares, percentage);
-		}
-
-		if (nonceResult[0] != 0x01000000 && nonceResult[0] != 0) {
-			uint32_t nonce = *nonceResult;
-			nonce = (((nonce & 0xFF000000) >> 24) | ((nonce & 0x00FF0000) >> 8) | ((nonce & 0x0000FF00) << 8) | ((nonce & 0x000000FF) << 24));
-
-			ucpClient.submitWork(jobId, timestamp, nonce);
-
-			nonceResult[0] = 0;
-
-			char line[100];
-
-			// Hash coming from GPU is reversed
-			uint64_t hashFlipped = 0;
-			hashFlipped |= (hashStart[0] & 0x00000000000000FF) << 56;
-			hashFlipped |= (hashStart[0] & 0x000000000000FF00) << 40;
-			hashFlipped |= (hashStart[0] & 0x0000000000FF0000) << 24;
-			hashFlipped |= (hashStart[0] & 0x00000000FF000000) << 8;
-			hashFlipped |= (hashStart[0] & 0x000000FF00000000) >> 8;
-			hashFlipped |= (hashStart[0] & 0x0000FF0000000000) >> 24;
-			hashFlipped |= (hashStart[0] & 0x00FF000000000000) >> 40;
-			hashFlipped |= (hashStart[0] & 0xFF00000000000000) >> 56;
-
-#if CPU_SHARES 
-			sprintf(line, "\t Share Found @ 2^24! {%#018llx} [nonce: %#08lx]", hashFlipped, nonce);
-#else
-			sprintf(line, "\t Share Found @ 2^32! {%#018llx} [nonce: %#08lx]", hashFlipped, nonce);
-#endif
-
-			cout << line << endl;
-			vprintf("Logging\n");
-			Log::info(line);
-			vprintf("Done logging\n");
-			vprintf("Made line\n");
-
-			numLines++;
+	pthread_t tids[MAX_GPUS];
+	struct mining_attr m_args[MAX_GPUS];
 
-			// Uncomment these lines to get access to this data for display purposes
-			/*
-			long long extraNonce = ucpClient.getStartExtraNonce();
-			int jobId = ucpClient.getJobId();
-			int encodedDifficulty = ucpClient.getEncodedDifficulty();
-			string previousBlockHashHex = ucpClient.getPreviousBlockHash();
-			string merkleRoot = ucpClient.getMerkleRoot();
-			*/
+	for (int i = 0; i < opt_n_threads; i++) {
+		m_args[i].host = host;
+		m_args[i].port = port;
+		m_args[i].username = username;
+		m_args[i].password = password;
+		m_args[i].dev_id = device_map[i];
 
-		}
-		vprintf("About to restart loop...\n");
+		pthread_attr_t attr;
+		pthread_attr_init(&attr);
+		pthread_create(&tids[i], &attr, miner_thread, &m_args[i]);
 	}
 
-	printf("Resetting device...\n");
-	cudaStatus = cudaDeviceReset();
-	if (cudaStatus != cudaSuccess) {
-		fprintf(stderr, "cudaDeviceReset failed!");
-		return 1;
-	}
-	printf("Done resetting device...\n");
+	pthread_join(tids[0], NULL);
 
-	getchar();
-	return 0;
 }
-
 uint32_t lastNonceStart = 0;
 
 // Grind Through vBlake nonces with the provided header, setting the resultant nonce and associated hash start if a high-difficulty solution is found
-cudaError_t grindNonces(uint32_t *nonceResult, uint64_t *hashStart, const uint64_t *header)
+void grindNonces(uint32_t startnonce, uint32_t *nonceResult, uint64_t *hashStart, const uint64_t *header, int dev_id)
 {
-	// Device memory
-	uint32_t *dev_nonceStart = 0;
-	uint64_t *dev_header = 0;
-	uint32_t *dev_nonceResult = 0;
-	uint64_t *dev_hashStart = 0;
-
-	// Ensure that nonces don't overlap previous work
-	uint32_t nonceStart = (uint64_t)lastNonceStart + (WORK_PER_THREAD * blocksize * threadsPerBlock);
-	lastNonceStart = nonceStart;
-
-	cudaError_t cudaStatus;
-
 	// Select GPU to run on
-	cudaStatus = cudaSetDevice(deviceToUse);
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaSetDevice failed!  Do you have a CUDA-capable GPU installed?");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
+	if (!init[dev_id])
+	{
+		CUDA_SAFE_CALL(cudaSetDevice(dev_id));
+		cudaDeviceReset();
+		cudaSetDeviceFlags(cudaDeviceScheduleBlockingSync);
+		cudaFuncSetCacheConfig(vblakeHasher, cudaFuncCachePreferL1);
 	// Allocate GPU buffers for nonce result and header
-	cudaStatus = cudaMalloc((void**)&dev_nonceStart, 1 * sizeof(uint32_t));
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMalloc failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
+		veri_init(dev_id);
+		init[dev_id] = 1;
 	}
-
+	
 	// Copy starting nonce to GPU
-	cudaStatus = cudaMemcpy(dev_nonceStart, &nonceStart, sizeof(uint32_t), cudaMemcpyHostToDevice);
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMemcpy failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	// Allocate GPU buffers for nonce result and header.
-	cudaStatus = cudaMalloc((void**)&dev_nonceResult, 1 * sizeof(uint32_t));
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMalloc failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	// Allocate GPU buffers for nonce result and header.
-	cudaStatus = cudaMalloc((void**)&dev_hashStart, 1 * sizeof(uint64_t));
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMalloc failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	cudaStatus = cudaMalloc((void**)&dev_header, 8 * sizeof(uint64_t));
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMalloc failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	// Copy input vectors from host memory to GPU buffers.
-	cudaStatus = cudaMemcpy(dev_header, header, 8 * sizeof(uint64_t), cudaMemcpyHostToDevice);
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMalloc failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	cudaMemcpyToSymbol(c_512, cpu_u512, sizeof(cpu_u512), 0, cudaMemcpyHostToDevice);
-	cudaMemcpyToSymbol(c_vblake, cpu_vBlake_iv, sizeof(cpu_vBlake_iv), 0, cudaMemcpyHostToDevice);
+	
+	veri_setBlock((void*)header);
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_512, cpu_u512, sizeof(cpu_u512), 0, cudaMemcpyHostToDevice));
+	CUDA_SAFE_CALL(cudaMemcpyToSymbol(c_vblake, cpu_vBlake_iv, sizeof(cpu_vBlake_iv), 0, cudaMemcpyHostToDevice));
+	cudaMemset(d_nonces[dev_id], 0x00, 1 * sizeof(uint32_t));
+	cudaMemset(dev_nonceStart[dev_id], 0x00, 1 * sizeof(uint64_t));
 
 	// Launch a kernel on the GPU with one thread for each element.
-	vblakeHasher << < blocksize, threadsPerBlock >> >(dev_nonceStart, dev_nonceResult, dev_hashStart, dev_header);
-
-	// Check for any errors launching the kernel
-	cudaStatus = cudaGetLastError();
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "grindNonces launch failed: %s\n", cudaGetErrorString(cudaStatus));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	// cudaDeviceSynchronize waits for the kernel to finish, and returns
-	// any errors encountered during the launch.
-	cudaStatus = cudaDeviceSynchronize();
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaDeviceSynchronize returned error code %d after launching grindNonces!\n", cudaStatus);
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-	// Copy output vector from GPU buffer to host memory.
-	cudaStatus = cudaMemcpy(nonceResult, dev_nonceResult, 1 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMemcpy failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-
-	// Copy output vector from GPU buffer to host memory.
-	cudaStatus = cudaMemcpy(hashStart, dev_hashStart, 1 * sizeof(uint64_t), cudaMemcpyDeviceToHost);
-	if (cudaStatus != cudaSuccess) {
-		sprintf(outputBuffer, "cudaMemcpy failed!");
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		cudaError_t e = cudaGetLastError();
-		sprintf(outputBuffer, "Cuda Error: %s\n", cudaGetErrorString(e));
-		cerr << outputBuffer << endl;
-		Log::error(outputBuffer);
-		goto Error;
-	}
-
-Error:
-	cudaFree(dev_nonceStart);
-	cudaFree(dev_header);
-	cudaFree(dev_nonceResult);
-	cudaFree(dev_hashStart);
-	return cudaStatus;
-}
+	vblakeHasher << < blocksize, threadsPerBlock >> >(startnonce, d_nonces[dev_id], dev_nonceStart[dev_id]);
+	cudaThreadSynchronize();
+	cudaMemcpy(nonceResult, d_nonces[dev_id], 1 * sizeof(uint32_t), cudaMemcpyDeviceToHost);
+	cudaMemcpy(hashStart, dev_nonceStart[dev_id], 1 * sizeof(uint64_t), cudaMemcpyDeviceToHost);
+};
diff --git a/libs/pthreadVC2.lib b/libs/pthreadVC2.lib
new file mode 100644
index 0000000..3e26e4a
Binary files /dev/null and b/libs/pthreadVC2.lib differ
diff --git a/pthreads/pthread.h b/pthreads/pthread.h
new file mode 100644
index 0000000..52a6874
--- /dev/null
+++ b/pthreads/pthread.h
@@ -0,0 +1,1372 @@
+/* This is an implementation of the threads API of POSIX 1003.1-2001.
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ *
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ *
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ *
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ *
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ *
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+
+// undef for DLLs
+#define PTW32_STATIC_LIB
+
+
+#if !defined( PTHREAD_H )
+#define PTHREAD_H
+
+/*
+ * See the README file for an explanation of the pthreads-win32 version
+ * numbering scheme and how the DLL is named etc.
+ */
+#define PTW32_VERSION 2,9,1,0
+#define PTW32_VERSION_STRING "2, 9, 1, 0\0"
+
+/* There are three implementations of cancel cleanup.
+ * Note that pthread.h is included in both application
+ * compilation units and also internally for the library.
+ * The code here and within the library aims to work
+ * for all reasonable combinations of environments.
+ *
+ * The three implementations are:
+ *
+ *   WIN32 SEH
+ *   C
+ *   C++
+ *
+ * Please note that exiting a push/pop block via
+ * "return", "exit", "break", or "continue" will
+ * lead to different behaviour amongst applications
+ * depending upon whether the library was built
+ * using SEH, C++, or C. For example, a library built
+ * with SEH will call the cleanup routine, while both
+ * C++ and C built versions will not.
+ */
+
+/*
+ * Define defaults for cleanup code.
+ * Note: Unless the build explicitly defines one of the following, then
+ * we default to standard C style cleanup. This style uses setjmp/longjmp
+ * in the cancelation and thread exit implementations and therefore won't
+ * do stack unwinding if linked to applications that have it (e.g.
+ * C++ apps). This is currently consistent with most/all commercial Unix
+ * POSIX threads implementations.
+ */
+#if !defined( __CLEANUP_SEH ) && !defined( __CLEANUP_CXX ) && !defined( __CLEANUP_C )
+# define __CLEANUP_C
+#endif
+
+#if defined( __CLEANUP_SEH ) && ( !defined( _MSC_VER ) && !defined(PTW32_RC_MSC))
+#error ERROR [__FILE__, line __LINE__]: SEH is not supported for this compiler.
+#endif
+
+/*
+ * Stop here if we are being included by the resource compiler.
+ */
+#if !defined(RC_INVOKED)
+
+#undef PTW32_LEVEL
+
+#if defined(_POSIX_SOURCE)
+#define PTW32_LEVEL 0
+/* Early POSIX */
+#endif
+
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
+#undef PTW32_LEVEL
+#define PTW32_LEVEL 1
+/* Include 1b, 1c and 1d */
+#endif
+
+#if defined(INCLUDE_NP)
+#undef PTW32_LEVEL
+#define PTW32_LEVEL 2
+/* Include Non-Portable extensions */
+#endif
+
+#define PTW32_LEVEL_MAX 3
+
+#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 )  || !defined(PTW32_LEVEL)
+#define PTW32_LEVEL PTW32_LEVEL_MAX
+/* Include everything */
+#endif
+
+#if defined(_UWIN)
+#   define HAVE_STRUCT_TIMESPEC 1
+#   define HAVE_SIGNAL_H        1
+#   undef HAVE_PTW32_CONFIG_H
+#   pragma comment(lib, "pthread")
+#endif
+
+/*
+ * -------------------------------------------------------------
+ *
+ *
+ * Module: pthread.h
+ *
+ * Purpose:
+ *      Provides an implementation of PThreads based upon the
+ *      standard:
+ *
+ *              POSIX 1003.1-2001
+ *  and
+ *    The Single Unix Specification version 3
+ *
+ *    (these two are equivalent)
+ *
+ *      in order to enhance code portability between Windows,
+ *  various commercial Unix implementations, and Linux.
+ *
+ *      See the ANNOUNCE file for a full list of conforming
+ *      routines and defined constants, and a list of missing
+ *      routines and constants not defined in this implementation.
+ *
+ * Authors:
+ *      There have been many contributors to this library.
+ *      The initial implementation was contributed by
+ *      John Bossom, and several others have provided major
+ *      sections or revisions of parts of the implementation.
+ *      Often significant effort has been contributed to
+ *      find and fix important bugs and other problems to
+ *      improve the reliability of the library, which sometimes
+ *      is not reflected in the amount of code which changed as
+ *      result.
+ *      As much as possible, the contributors are acknowledged
+ *      in the ChangeLog file in the source code distribution
+ *      where their changes are noted in detail.
+ *
+ *      Contributors are listed in the CONTRIBUTORS file.
+ *
+ *      As usual, all bouquets go to the contributors, and all
+ *      brickbats go to the project maintainer.
+ *
+ * Maintainer:
+ *      The code base for this project is coordinated and
+ *      eventually pre-tested, packaged, and made available by
+ *
+ *              Ross Johnson <rpj@callisto.canberra.edu.au>
+ *
+ * QA Testers:
+ *      Ultimately, the library is tested in the real world by
+ *      a host of competent and demanding scientists and
+ *      engineers who report bugs and/or provide solutions
+ *      which are then fixed or incorporated into subsequent
+ *      versions of the library. Each time a bug is fixed, a
+ *      test case is written to prove the fix and ensure
+ *      that later changes to the code don't reintroduce the
+ *      same error. The number of test cases is slowly growing
+ *      and therefore so is the code reliability.
+ *
+ * Compliance:
+ *      See the file ANNOUNCE for the list of implemented
+ *      and not-implemented routines and defined options.
+ *      Of course, these are all defined is this file as well.
+ *
+ * Web site:
+ *      The source code and other information about this library
+ *      are available from
+ *
+ *              http://sources.redhat.com/pthreads-win32/
+ *
+ * -------------------------------------------------------------
+ */
+
+/* Try to avoid including windows.h */
+#if (defined(__MINGW64__) || defined(__MINGW32__)) && defined(__cplusplus)
+#define PTW32_INCLUDE_WINDOWS_H
+#endif
+
+#if defined(PTW32_INCLUDE_WINDOWS_H)
+#include <windows.h>
+#endif
+
+#if defined(_MSC_VER) && _MSC_VER < 1300 || defined(__DMC__)
+/*
+ * VC++6.0 or early compiler's header has no DWORD_PTR type.
+ */
+typedef unsigned long DWORD_PTR;
+typedef unsigned long ULONG_PTR;
+#endif
+/*
+ * -----------------
+ * autoconf switches
+ * -----------------
+ */
+
+#if defined(HAVE_PTW32_CONFIG_H)
+#include "config.h"
+#endif /* HAVE_PTW32_CONFIG_H */
+
+#if !defined(NEED_FTIME)
+#include <time.h>
+#else /* NEED_FTIME */
+/* use native WIN32 time API */
+#endif /* NEED_FTIME */
+
+#if defined(HAVE_SIGNAL_H)
+#include <signal.h>
+#endif /* HAVE_SIGNAL_H */
+
+#include <limits.h>
+
+/*
+ * Boolean values to make us independent of system includes.
+ */
+enum {
+  PTW32_FALSE = 0,
+  PTW32_TRUE = (! PTW32_FALSE)
+};
+
+/*
+ * This is a duplicate of what is in the autoconf config.h,
+ * which is only used when building the pthread-win32 libraries.
+ */
+
+#if !defined(PTW32_CONFIG_H)
+#  if defined(WINCE)
+#    define NEED_ERRNO
+#    define NEED_SEM
+#  endif
+#  if defined(__MINGW64__)
+#    define HAVE_STRUCT_TIMESPEC
+#    define HAVE_MODE_T
+#  elif defined(_UWIN) || defined(__MINGW32__)
+#    define HAVE_MODE_T
+#  endif
+#endif
+
+/*
+ *
+ */
+
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX
+#if defined(NEED_ERRNO)
+#include "need_errno.h"
+#else
+#include <errno.h>
+#endif
+#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
+
+/*
+ * Several systems don't define some error numbers.
+ */
+#if !defined(ENOTSUP)
+#  define ENOTSUP 48   /* This is the value in Solaris. */
+#endif
+
+#if !defined(ETIMEDOUT)
+#  define ETIMEDOUT 10060 /* Same as WSAETIMEDOUT */
+#endif
+
+#if !defined(ENOSYS)
+#  define ENOSYS 140     /* Semi-arbitrary value */
+#endif
+
+#if !defined(EDEADLK)
+#  if defined(EDEADLOCK)
+#    define EDEADLK EDEADLOCK
+#  else
+#    define EDEADLK 36     /* This is the value in MSVC. */
+#  endif
+#endif
+
+/* POSIX 2008 - related to robust mutexes */
+#if !defined(EOWNERDEAD)
+#  define EOWNERDEAD 43
+#endif
+#if !defined(ENOTRECOVERABLE)
+#  define ENOTRECOVERABLE 44
+#endif
+
+#include "./pthreads/sched.h"
+
+/*
+ * To avoid including windows.h we define only those things that we
+ * actually need from it.
+ */
+#if !defined(PTW32_INCLUDE_WINDOWS_H)
+#if !defined(HANDLE)
+# define PTW32__HANDLE_DEF
+# define HANDLE void *
+#endif
+#if !defined(DWORD)
+# define PTW32__DWORD_DEF
+# define DWORD unsigned long
+#endif
+#endif
+
+#if !defined(HAVE_STRUCT_TIMESPEC)
+#define HAVE_STRUCT_TIMESPEC
+#if !defined(_TIMESPEC_DEFINED)
+#define _TIMESPEC_DEFINED
+struct timespec {
+        time_t tv_sec;
+        long tv_nsec;
+};
+#endif /* _TIMESPEC_DEFINED */
+#endif /* HAVE_STRUCT_TIMESPEC */
+
+#if !defined(SIG_BLOCK)
+#define SIG_BLOCK 0
+#endif /* SIG_BLOCK */
+
+#if !defined(SIG_UNBLOCK)
+#define SIG_UNBLOCK 1
+#endif /* SIG_UNBLOCK */
+
+#if !defined(SIG_SETMASK)
+#define SIG_SETMASK 2
+#endif /* SIG_SETMASK */
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif                          /* __cplusplus */
+
+/*
+ * -------------------------------------------------------------
+ *
+ * POSIX 1003.1-2001 Options
+ * =========================
+ *
+ * Options are normally set in <unistd.h>, which is not provided
+ * with pthreads-win32.
+ *
+ * For conformance with the Single Unix Specification (version 3), all of the
+ * options below are defined, and have a value of either -1 (not supported)
+ * or 200112L (supported).
+ *
+ * These options can neither be left undefined nor have a value of 0, because
+ * either indicates that sysconf(), which is not implemented, may be used at
+ * runtime to check the status of the option.
+ *
+ * _POSIX_THREADS (== 200112L)
+ *                      If == 200112L, you can use threads
+ *
+ * _POSIX_THREAD_ATTR_STACKSIZE (== 200112L)
+ *                      If == 200112L, you can control the size of a thread's
+ *                      stack
+ *                              pthread_attr_getstacksize
+ *                              pthread_attr_setstacksize
+ *
+ * _POSIX_THREAD_ATTR_STACKADDR (== -1)
+ *                      If == 200112L, you can allocate and control a thread's
+ *                      stack. If not supported, the following functions
+ *                      will return ENOSYS, indicating they are not
+ *                      supported:
+ *                              pthread_attr_getstackaddr
+ *                              pthread_attr_setstackaddr
+ *
+ * _POSIX_THREAD_PRIORITY_SCHEDULING (== -1)
+ *                      If == 200112L, you can use realtime scheduling.
+ *                      This option indicates that the behaviour of some
+ *                      implemented functions conforms to the additional TPS
+ *                      requirements in the standard. E.g. rwlocks favour
+ *                      writers over readers when threads have equal priority.
+ *
+ * _POSIX_THREAD_PRIO_INHERIT (== -1)
+ *                      If == 200112L, you can create priority inheritance
+ *                      mutexes.
+ *                              pthread_mutexattr_getprotocol +
+ *                              pthread_mutexattr_setprotocol +
+ *
+ * _POSIX_THREAD_PRIO_PROTECT (== -1)
+ *                      If == 200112L, you can create priority ceiling mutexes
+ *                      Indicates the availability of:
+ *                              pthread_mutex_getprioceiling
+ *                              pthread_mutex_setprioceiling
+ *                              pthread_mutexattr_getprioceiling
+ *                              pthread_mutexattr_getprotocol     +
+ *                              pthread_mutexattr_setprioceiling
+ *                              pthread_mutexattr_setprotocol     +
+ *
+ * _POSIX_THREAD_PROCESS_SHARED (== -1)
+ *                      If set, you can create mutexes and condition
+ *                      variables that can be shared with another
+ *                      process.If set, indicates the availability
+ *                      of:
+ *                              pthread_mutexattr_getpshared
+ *                              pthread_mutexattr_setpshared
+ *                              pthread_condattr_getpshared
+ *                              pthread_condattr_setpshared
+ *
+ * _POSIX_THREAD_SAFE_FUNCTIONS (== 200112L)
+ *                      If == 200112L you can use the special *_r library
+ *                      functions that provide thread-safe behaviour
+ *
+ * _POSIX_READER_WRITER_LOCKS (== 200112L)
+ *                      If == 200112L, you can use read/write locks
+ *
+ * _POSIX_SPIN_LOCKS (== 200112L)
+ *                      If == 200112L, you can use spin locks
+ *
+ * _POSIX_BARRIERS (== 200112L)
+ *                      If == 200112L, you can use barriers
+ *
+ *      + These functions provide both 'inherit' and/or
+ *        'protect' protocol, based upon these macro
+ *        settings.
+ *
+ * -------------------------------------------------------------
+ */
+
+/*
+ * POSIX Options
+ */
+#undef _POSIX_THREADS
+#define _POSIX_THREADS 200809L
+
+#undef _POSIX_READER_WRITER_LOCKS
+#define _POSIX_READER_WRITER_LOCKS 200809L
+
+#undef _POSIX_SPIN_LOCKS
+#define _POSIX_SPIN_LOCKS 200809L
+
+#undef _POSIX_BARRIERS
+#define _POSIX_BARRIERS 200809L
+
+#undef _POSIX_THREAD_SAFE_FUNCTIONS
+#define _POSIX_THREAD_SAFE_FUNCTIONS 200809L
+
+#undef _POSIX_THREAD_ATTR_STACKSIZE
+#define _POSIX_THREAD_ATTR_STACKSIZE 200809L
+
+/*
+ * The following options are not supported
+ */
+#undef _POSIX_THREAD_ATTR_STACKADDR
+#define _POSIX_THREAD_ATTR_STACKADDR -1
+
+#undef _POSIX_THREAD_PRIO_INHERIT
+#define _POSIX_THREAD_PRIO_INHERIT -1
+
+#undef _POSIX_THREAD_PRIO_PROTECT
+#define _POSIX_THREAD_PRIO_PROTECT -1
+
+/* TPS is not fully supported.  */
+#undef _POSIX_THREAD_PRIORITY_SCHEDULING
+#define _POSIX_THREAD_PRIORITY_SCHEDULING -1
+
+#undef _POSIX_THREAD_PROCESS_SHARED
+#define _POSIX_THREAD_PROCESS_SHARED -1
+
+
+/*
+ * POSIX 1003.1-2001 Limits
+ * ===========================
+ *
+ * These limits are normally set in <limits.h>, which is not provided with
+ * pthreads-win32.
+ *
+ * PTHREAD_DESTRUCTOR_ITERATIONS
+ *                      Maximum number of attempts to destroy
+ *                      a thread's thread-specific data on
+ *                      termination (must be at least 4)
+ *
+ * PTHREAD_KEYS_MAX
+ *                      Maximum number of thread-specific data keys
+ *                      available per process (must be at least 128)
+ *
+ * PTHREAD_STACK_MIN
+ *                      Minimum supported stack size for a thread
+ *
+ * PTHREAD_THREADS_MAX
+ *                      Maximum number of threads supported per
+ *                      process (must be at least 64).
+ *
+ * SEM_NSEMS_MAX
+ *                      The maximum number of semaphores a process can have.
+ *                      (must be at least 256)
+ *
+ * SEM_VALUE_MAX
+ *                      The maximum value a semaphore can have.
+ *                      (must be at least 32767)
+ *
+ */
+#undef _POSIX_THREAD_DESTRUCTOR_ITERATIONS
+#define _POSIX_THREAD_DESTRUCTOR_ITERATIONS     4
+
+#undef PTHREAD_DESTRUCTOR_ITERATIONS
+#define PTHREAD_DESTRUCTOR_ITERATIONS           _POSIX_THREAD_DESTRUCTOR_ITERATIONS
+
+#undef _POSIX_THREAD_KEYS_MAX
+#define _POSIX_THREAD_KEYS_MAX                  128
+
+#undef PTHREAD_KEYS_MAX
+#define PTHREAD_KEYS_MAX                        _POSIX_THREAD_KEYS_MAX
+
+#undef PTHREAD_STACK_MIN
+#define PTHREAD_STACK_MIN                       0
+
+#undef _POSIX_THREAD_THREADS_MAX
+#define _POSIX_THREAD_THREADS_MAX               64
+
+  /* Arbitrary value */
+#undef PTHREAD_THREADS_MAX
+#define PTHREAD_THREADS_MAX                     2019
+
+#undef _POSIX_SEM_NSEMS_MAX
+#define _POSIX_SEM_NSEMS_MAX                    256
+
+  /* Arbitrary value */
+#undef SEM_NSEMS_MAX
+#define SEM_NSEMS_MAX                           1024
+
+#undef _POSIX_SEM_VALUE_MAX
+#define _POSIX_SEM_VALUE_MAX                    32767
+
+#undef SEM_VALUE_MAX
+#define SEM_VALUE_MAX                           INT_MAX
+
+
+#if defined(__GNUC__) && !defined(__declspec)
+# error Please upgrade your GNU compiler to one that supports __declspec.
+#endif
+
+/*
+ * When building the library, you should define PTW32_BUILD so that
+ * the variables/functions are exported correctly. When using the library,
+ * do NOT define PTW32_BUILD, and then the variables/functions will
+ * be imported correctly.
+ */
+#if !defined(PTW32_STATIC_LIB)
+#  if defined(PTW32_BUILD)
+#    define PTW32_DLLPORT __declspec (dllexport)
+#  else
+#    define PTW32_DLLPORT __declspec (dllimport)
+#  endif
+#else
+#  define PTW32_DLLPORT
+#endif
+
+/*
+ * The Open Watcom C/C++ compiler uses a non-standard calling convention
+ * that passes function args in registers unless __cdecl is explicitly specified
+ * in exposed function prototypes.
+ *
+ * We force all calls to cdecl even though this could slow Watcom code down
+ * slightly. If you know that the Watcom compiler will be used to build both
+ * the DLL and application, then you can probably define this as a null string.
+ * Remember that pthread.h (this file) is used for both the DLL and application builds.
+ */
+#define PTW32_CDECL __cdecl
+
+#if defined(_UWIN) && PTW32_LEVEL >= PTW32_LEVEL_MAX
+#   include     <sys/types.h>
+#else
+/*
+ * Generic handle type - intended to extend uniqueness beyond
+ * that available with a simple pointer. It should scale for either
+ * IA-32 or IA-64.
+ */
+typedef struct {
+    void * p;                   /* Pointer to actual object */
+    unsigned int x;             /* Extra information - reuse count etc */
+} ptw32_handle_t;
+
+typedef ptw32_handle_t pthread_t;
+typedef struct pthread_attr_t_ * pthread_attr_t;
+typedef struct pthread_once_t_ pthread_once_t;
+typedef struct pthread_key_t_ * pthread_key_t;
+typedef struct pthread_mutex_t_ * pthread_mutex_t;
+typedef struct pthread_mutexattr_t_ * pthread_mutexattr_t;
+typedef struct pthread_cond_t_ * pthread_cond_t;
+typedef struct pthread_condattr_t_ * pthread_condattr_t;
+#endif
+typedef struct pthread_rwlock_t_ * pthread_rwlock_t;
+typedef struct pthread_rwlockattr_t_ * pthread_rwlockattr_t;
+typedef struct pthread_spinlock_t_ * pthread_spinlock_t;
+typedef struct pthread_barrier_t_ * pthread_barrier_t;
+typedef struct pthread_barrierattr_t_ * pthread_barrierattr_t;
+
+/*
+ * ====================
+ * ====================
+ * POSIX Threads
+ * ====================
+ * ====================
+ */
+
+enum {
+/*
+ * pthread_attr_{get,set}detachstate
+ */
+  PTHREAD_CREATE_JOINABLE       = 0,  /* Default */
+  PTHREAD_CREATE_DETACHED       = 1,
+
+/*
+ * pthread_attr_{get,set}inheritsched
+ */
+  PTHREAD_INHERIT_SCHED         = 0,
+  PTHREAD_EXPLICIT_SCHED        = 1,  /* Default */
+
+/*
+ * pthread_{get,set}scope
+ */
+  PTHREAD_SCOPE_PROCESS         = 0,
+  PTHREAD_SCOPE_SYSTEM          = 1,  /* Default */
+
+/*
+ * pthread_setcancelstate paramters
+ */
+  PTHREAD_CANCEL_ENABLE         = 0,  /* Default */
+  PTHREAD_CANCEL_DISABLE        = 1,
+
+/*
+ * pthread_setcanceltype parameters
+ */
+  PTHREAD_CANCEL_ASYNCHRONOUS   = 0,
+  PTHREAD_CANCEL_DEFERRED       = 1,  /* Default */
+
+/*
+ * pthread_mutexattr_{get,set}pshared
+ * pthread_condattr_{get,set}pshared
+ */
+  PTHREAD_PROCESS_PRIVATE       = 0,
+  PTHREAD_PROCESS_SHARED        = 1,
+
+/*
+ * pthread_mutexattr_{get,set}robust
+ */
+  PTHREAD_MUTEX_STALLED         = 0,  /* Default */
+  PTHREAD_MUTEX_ROBUST          = 1,
+
+/*
+ * pthread_barrier_wait
+ */
+  PTHREAD_BARRIER_SERIAL_THREAD = -1
+};
+
+/*
+ * ====================
+ * ====================
+ * Cancelation
+ * ====================
+ * ====================
+ */
+#define PTHREAD_CANCELED       ((void *)(size_t) -1)
+
+
+/*
+ * ====================
+ * ====================
+ * Once Key
+ * ====================
+ * ====================
+ */
+#define PTHREAD_ONCE_INIT       { PTW32_FALSE, 0, 0, 0}
+
+struct pthread_once_t_
+{
+  int          done;        /* indicates if user function has been executed */
+  void *       lock;
+  int          reserved1;
+  int          reserved2;
+};
+
+
+/*
+ * ====================
+ * ====================
+ * Object initialisers
+ * ====================
+ * ====================
+ */
+#define PTHREAD_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -1)
+#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -2)
+#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER ((pthread_mutex_t)(size_t) -3)
+
+/*
+ * Compatibility with LinuxThreads
+ */
+#define PTHREAD_RECURSIVE_MUTEX_INITIALIZER_NP PTHREAD_RECURSIVE_MUTEX_INITIALIZER
+#define PTHREAD_ERRORCHECK_MUTEX_INITIALIZER_NP PTHREAD_ERRORCHECK_MUTEX_INITIALIZER
+
+#define PTHREAD_COND_INITIALIZER ((pthread_cond_t)(size_t) -1)
+
+#define PTHREAD_RWLOCK_INITIALIZER ((pthread_rwlock_t)(size_t) -1)
+
+#define PTHREAD_SPINLOCK_INITIALIZER ((pthread_spinlock_t)(size_t) -1)
+
+
+/*
+ * Mutex types.
+ */
+enum
+{
+  /* Compatibility with LinuxThreads */
+  PTHREAD_MUTEX_FAST_NP,
+  PTHREAD_MUTEX_RECURSIVE_NP,
+  PTHREAD_MUTEX_ERRORCHECK_NP,
+  PTHREAD_MUTEX_TIMED_NP = PTHREAD_MUTEX_FAST_NP,
+  PTHREAD_MUTEX_ADAPTIVE_NP = PTHREAD_MUTEX_FAST_NP,
+  /* For compatibility with POSIX */
+  PTHREAD_MUTEX_NORMAL = PTHREAD_MUTEX_FAST_NP,
+  PTHREAD_MUTEX_RECURSIVE = PTHREAD_MUTEX_RECURSIVE_NP,
+  PTHREAD_MUTEX_ERRORCHECK = PTHREAD_MUTEX_ERRORCHECK_NP,
+  PTHREAD_MUTEX_DEFAULT = PTHREAD_MUTEX_NORMAL
+};
+
+
+typedef struct ptw32_cleanup_t ptw32_cleanup_t;
+
+#if defined(_MSC_VER)
+/* Disable MSVC 'anachronism used' warning */
+#pragma warning( disable : 4229 )
+#endif
+
+typedef void (* PTW32_CDECL ptw32_cleanup_callback_t)(void *);
+
+#if defined(_MSC_VER)
+#pragma warning( default : 4229 )
+#endif
+
+struct ptw32_cleanup_t
+{
+  ptw32_cleanup_callback_t routine;
+  void *arg;
+  struct ptw32_cleanup_t *prev;
+};
+
+#if defined(__CLEANUP_SEH)
+        /*
+         * WIN32 SEH version of cancel cleanup.
+         */
+
+#define pthread_cleanup_push( _rout, _arg ) \
+        { \
+            ptw32_cleanup_t     _cleanup; \
+            \
+        _cleanup.routine        = (ptw32_cleanup_callback_t)(_rout); \
+            _cleanup.arg        = (_arg); \
+            __try \
+              { \
+
+#define pthread_cleanup_pop( _execute ) \
+              } \
+            __finally \
+                { \
+                    if( _execute || AbnormalTermination()) \
+                      { \
+                          (*(_cleanup.routine))( _cleanup.arg ); \
+                      } \
+                } \
+        }
+
+#else /* __CLEANUP_SEH */
+
+#if defined(__CLEANUP_C)
+
+        /*
+         * C implementation of PThreads cancel cleanup
+         */
+
+#define pthread_cleanup_push( _rout, _arg ) \
+        { \
+            ptw32_cleanup_t     _cleanup; \
+            \
+            ptw32_push_cleanup( &_cleanup, (ptw32_cleanup_callback_t) (_rout), (_arg) ); \
+
+#define pthread_cleanup_pop( _execute ) \
+            (void) ptw32_pop_cleanup( _execute ); \
+        }
+
+#else /* __CLEANUP_C */
+
+#if defined(__CLEANUP_CXX)
+
+        /*
+         * C++ version of cancel cleanup.
+         * - John E. Bossom.
+         */
+
+        class PThreadCleanup {
+          /*
+           * PThreadCleanup
+           *
+           * Purpose
+           *      This class is a C++ helper class that is
+           *      used to implement pthread_cleanup_push/
+           *      pthread_cleanup_pop.
+           *      The destructor of this class automatically
+           *      pops the pushed cleanup routine regardless
+           *      of how the code exits the scope
+           *      (i.e. such as by an exception)
+           */
+      ptw32_cleanup_callback_t cleanUpRout;
+          void    *       obj;
+          int             executeIt;
+
+        public:
+          PThreadCleanup() :
+            cleanUpRout( 0 ),
+            obj( 0 ),
+            executeIt( 0 )
+            /*
+             * No cleanup performed
+             */
+            {
+            }
+
+          PThreadCleanup(
+             ptw32_cleanup_callback_t routine,
+                         void    *       arg ) :
+            cleanUpRout( routine ),
+            obj( arg ),
+            executeIt( 1 )
+            /*
+             * Registers a cleanup routine for 'arg'
+             */
+            {
+            }
+
+          ~PThreadCleanup()
+            {
+              if ( executeIt && ((void *) cleanUpRout != (void *) 0) )
+                {
+                  (void) (*cleanUpRout)( obj );
+                }
+            }
+
+          void execute( int exec )
+            {
+              executeIt = exec;
+            }
+        };
+
+        /*
+         * C++ implementation of PThreads cancel cleanup;
+         * This implementation takes advantage of a helper
+         * class who's destructor automatically calls the
+         * cleanup routine if we exit our scope weirdly
+         */
+#define pthread_cleanup_push( _rout, _arg ) \
+        { \
+            PThreadCleanup  cleanup((ptw32_cleanup_callback_t)(_rout), \
+                                    (void *) (_arg) );
+
+#define pthread_cleanup_pop( _execute ) \
+            cleanup.execute( _execute ); \
+        }
+
+#else
+
+#error ERROR [__FILE__, line __LINE__]: Cleanup type undefined.
+
+#endif /* __CLEANUP_CXX */
+
+#endif /* __CLEANUP_C */
+
+#endif /* __CLEANUP_SEH */
+
+/*
+ * ===============
+ * ===============
+ * Methods
+ * ===============
+ * ===============
+ */
+
+/*
+ * PThread Attribute Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_init (pthread_attr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_destroy (pthread_attr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getdetachstate (const pthread_attr_t * attr,
+                                         int *detachstate);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstackaddr (const pthread_attr_t * attr,
+                                       void **stackaddr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getstacksize (const pthread_attr_t * attr,
+                                       size_t * stacksize);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setdetachstate (pthread_attr_t * attr,
+                                         int detachstate);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstackaddr (pthread_attr_t * attr,
+                                       void *stackaddr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setstacksize (pthread_attr_t * attr,
+                                       size_t stacksize);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedparam (const pthread_attr_t *attr,
+                                        struct sched_param *param);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedparam (pthread_attr_t *attr,
+                                        const struct sched_param *param);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setschedpolicy (pthread_attr_t *,
+                                         int);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getschedpolicy (const pthread_attr_t *,
+                                         int *);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setinheritsched(pthread_attr_t * attr,
+                                         int inheritsched);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getinheritsched(const pthread_attr_t * attr,
+                                         int * inheritsched);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_setscope (pthread_attr_t *,
+                                   int);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_attr_getscope (const pthread_attr_t *,
+                                   int *);
+
+/*
+ * PThread Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_create (pthread_t * tid,
+                            const pthread_attr_t * attr,
+                            void *(PTW32_CDECL *start) (void *),
+                            void *arg);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_detach (pthread_t tid);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_equal (pthread_t t1,
+                           pthread_t t2);
+
+PTW32_DLLPORT void PTW32_CDECL pthread_exit (void *value_ptr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_join (pthread_t thread,
+                          void **value_ptr);
+
+PTW32_DLLPORT pthread_t PTW32_CDECL pthread_self (void);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cancel (pthread_t thread);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_setcancelstate (int state,
+                                    int *oldstate);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_setcanceltype (int type,
+                                   int *oldtype);
+
+PTW32_DLLPORT void PTW32_CDECL pthread_testcancel (void);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_once (pthread_once_t * once_control,
+                          void (PTW32_CDECL *init_routine) (void));
+
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX
+PTW32_DLLPORT ptw32_cleanup_t * PTW32_CDECL ptw32_pop_cleanup (int execute);
+
+PTW32_DLLPORT void PTW32_CDECL ptw32_push_cleanup (ptw32_cleanup_t * cleanup,
+                                 ptw32_cleanup_callback_t routine,
+                                 void *arg);
+#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
+
+/*
+ * Thread Specific Data Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_key_create (pthread_key_t * key,
+                                void (PTW32_CDECL *destructor) (void *));
+
+PTW32_DLLPORT int PTW32_CDECL pthread_key_delete (pthread_key_t key);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_setspecific (pthread_key_t key,
+                                 const void *value);
+
+PTW32_DLLPORT void * PTW32_CDECL pthread_getspecific (pthread_key_t key);
+
+
+/*
+ * Mutex Attribute Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_init (pthread_mutexattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_destroy (pthread_mutexattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getpshared (const pthread_mutexattr_t
+                                          * attr,
+                                          int *pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setpshared (pthread_mutexattr_t * attr,
+                                          int pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_settype (pthread_mutexattr_t * attr, int kind);
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_gettype (const pthread_mutexattr_t * attr, int *kind);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setrobust(
+                                           pthread_mutexattr_t *attr,
+                                           int robust);
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getrobust(
+                                           const pthread_mutexattr_t * attr,
+                                           int * robust);
+
+/*
+ * Barrier Attribute Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_init (pthread_barrierattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_destroy (pthread_barrierattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_getpshared (const pthread_barrierattr_t
+                                            * attr,
+                                            int *pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_barrierattr_setpshared (pthread_barrierattr_t * attr,
+                                            int pshared);
+
+/*
+ * Mutex Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_init (pthread_mutex_t * mutex,
+                                const pthread_mutexattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_destroy (pthread_mutex_t * mutex);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_lock (pthread_mutex_t * mutex);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_timedlock(pthread_mutex_t * mutex,
+                                    const struct timespec *abstime);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_trylock (pthread_mutex_t * mutex);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_unlock (pthread_mutex_t * mutex);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_mutex_consistent (pthread_mutex_t * mutex);
+
+/*
+ * Spinlock Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_spin_init (pthread_spinlock_t * lock, int pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_spin_destroy (pthread_spinlock_t * lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_spin_lock (pthread_spinlock_t * lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_spin_trylock (pthread_spinlock_t * lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_spin_unlock (pthread_spinlock_t * lock);
+
+/*
+ * Barrier Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_barrier_init (pthread_barrier_t * barrier,
+                                  const pthread_barrierattr_t * attr,
+                                  unsigned int count);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_barrier_destroy (pthread_barrier_t * barrier);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_barrier_wait (pthread_barrier_t * barrier);
+
+/*
+ * Condition Variable Attribute Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_condattr_init (pthread_condattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_condattr_destroy (pthread_condattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_condattr_getpshared (const pthread_condattr_t * attr,
+                                         int *pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_condattr_setpshared (pthread_condattr_t * attr,
+                                         int pshared);
+
+/*
+ * Condition Variable Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_init (pthread_cond_t * cond,
+                               const pthread_condattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_destroy (pthread_cond_t * cond);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_wait (pthread_cond_t * cond,
+                               pthread_mutex_t * mutex);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_timedwait (pthread_cond_t * cond,
+                                    pthread_mutex_t * mutex,
+                                    const struct timespec *abstime);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_signal (pthread_cond_t * cond);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_cond_broadcast (pthread_cond_t * cond);
+
+/*
+ * Scheduling
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_setschedparam (pthread_t thread,
+                                   int policy,
+                                   const struct sched_param *param);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_getschedparam (pthread_t thread,
+                                   int *policy,
+                                   struct sched_param *param);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_setconcurrency (int);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_getconcurrency (void);
+
+/*
+ * Read-Write Lock Functions
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_init(pthread_rwlock_t *lock,
+                                const pthread_rwlockattr_t *attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_destroy(pthread_rwlock_t *lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_tryrdlock(pthread_rwlock_t *);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_trywrlock(pthread_rwlock_t *);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_rdlock(pthread_rwlock_t *lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedrdlock(pthread_rwlock_t *lock,
+                                       const struct timespec *abstime);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_wrlock(pthread_rwlock_t *lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_timedwrlock(pthread_rwlock_t *lock,
+                                       const struct timespec *abstime);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlock_unlock(pthread_rwlock_t *lock);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_init (pthread_rwlockattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_destroy (pthread_rwlockattr_t * attr);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_getpshared (const pthread_rwlockattr_t * attr,
+                                           int *pshared);
+
+PTW32_DLLPORT int PTW32_CDECL pthread_rwlockattr_setpshared (pthread_rwlockattr_t * attr,
+                                           int pshared);
+
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX - 1
+
+/*
+ * Signal Functions. Should be defined in <signal.h> but MSVC and MinGW32
+ * already have signal.h that don't define these.
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_kill(pthread_t thread, int sig);
+
+/*
+ * Non-portable functions
+ */
+
+/*
+ * Compatibility with Linux.
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_setkind_np(pthread_mutexattr_t * attr,
+                                         int kind);
+PTW32_DLLPORT int PTW32_CDECL pthread_mutexattr_getkind_np(pthread_mutexattr_t * attr,
+                                         int *kind);
+
+/*
+ * Possibly supported by other POSIX threads implementations
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_delay_np (struct timespec * interval);
+PTW32_DLLPORT int PTW32_CDECL pthread_num_processors_np(void);
+PTW32_DLLPORT unsigned __int64 PTW32_CDECL pthread_getunique_np(pthread_t thread);
+
+/*
+ * Useful if an application wants to statically link
+ * the lib rather than load the DLL at run-time.
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_attach_np(void);
+PTW32_DLLPORT int PTW32_CDECL pthread_win32_process_detach_np(void);
+PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_attach_np(void);
+PTW32_DLLPORT int PTW32_CDECL pthread_win32_thread_detach_np(void);
+
+/*
+ * Features that are auto-detected at load/run time.
+ */
+PTW32_DLLPORT int PTW32_CDECL pthread_win32_test_features_np(int);
+enum ptw32_features {
+  PTW32_SYSTEM_INTERLOCKED_COMPARE_EXCHANGE = 0x0001, /* System provides it. */
+  PTW32_ALERTABLE_ASYNC_CANCEL              = 0x0002  /* Can cancel blocked threads. */
+};
+
+/*
+ * Register a system time change with the library.
+ * Causes the library to perform various functions
+ * in response to the change. Should be called whenever
+ * the application's top level window receives a
+ * WM_TIMECHANGE message. It can be passed directly to
+ * pthread_create() as a new thread if desired.
+ */
+PTW32_DLLPORT void * PTW32_CDECL pthread_timechange_handler_np(void *);
+
+#endif /*PTW32_LEVEL >= PTW32_LEVEL_MAX - 1 */
+
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX
+
+/*
+ * Returns the Win32 HANDLE for the POSIX thread.
+ */
+PTW32_DLLPORT HANDLE PTW32_CDECL pthread_getw32threadhandle_np(pthread_t thread);
+/*
+ * Returns the win32 thread ID for POSIX thread.
+ */
+PTW32_DLLPORT DWORD PTW32_CDECL pthread_getw32threadid_np (pthread_t thread);
+
+
+/*
+ * Protected Methods
+ *
+ * This function blocks until the given WIN32 handle
+ * is signaled or pthread_cancel had been called.
+ * This function allows the caller to hook into the
+ * PThreads cancel mechanism. It is implemented using
+ *
+ *              WaitForMultipleObjects
+ *
+ * on 'waitHandle' and a manually reset WIN32 Event
+ * used to implement pthread_cancel. The 'timeout'
+ * argument to TimedWait is simply passed to
+ * WaitForMultipleObjects.
+ */
+PTW32_DLLPORT int PTW32_CDECL pthreadCancelableWait (HANDLE waitHandle);
+PTW32_DLLPORT int PTW32_CDECL pthreadCancelableTimedWait (HANDLE waitHandle,
+                                        DWORD timeout);
+
+#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
+
+/*
+ * Thread-Safe C Runtime Library Mappings.
+ */
+#if !defined(_UWIN)
+#  if defined(NEED_ERRNO)
+     PTW32_DLLPORT int * PTW32_CDECL _errno( void );
+#  else
+#    if !defined(errno)
+#      if (defined(_MT) || defined(_DLL))
+         __declspec(dllimport) extern int * __cdecl _errno(void);
+#        define errno   (*_errno())
+#      endif
+#    endif
+#  endif
+#endif
+
+/*
+ * Some compiler environments don't define some things.
+ */
+#if defined(__BORLANDC__)
+#  define _ftime ftime
+#  define _timeb timeb
+#endif
+
+#if defined(__cplusplus)
+
+/*
+ * Internal exceptions
+ */
+class ptw32_exception {};
+class ptw32_exception_cancel : public ptw32_exception {};
+class ptw32_exception_exit   : public ptw32_exception {};
+
+#endif
+
+#if PTW32_LEVEL >= PTW32_LEVEL_MAX
+
+/* FIXME: This is only required if the library was built using SEH */
+/*
+ * Get internal SEH tag
+ */
+PTW32_DLLPORT DWORD PTW32_CDECL ptw32_get_exception_services_code(void);
+
+#endif /* PTW32_LEVEL >= PTW32_LEVEL_MAX */
+
+#if !defined(PTW32_BUILD)
+
+#if defined(__CLEANUP_SEH)
+
+/*
+ * Redefine the SEH __except keyword to ensure that applications
+ * propagate our internal exceptions up to the library's internal handlers.
+ */
+#define __except( E ) \
+        __except( ( GetExceptionCode() == ptw32_get_exception_services_code() ) \
+                 ? EXCEPTION_CONTINUE_SEARCH : ( E ) )
+
+#endif /* __CLEANUP_SEH */
+
+#if defined(__CLEANUP_CXX)
+
+/*
+ * Redefine the C++ catch keyword to ensure that applications
+ * propagate our internal exceptions up to the library's internal handlers.
+ */
+#if defined(_MSC_VER)
+        /*
+         * WARNING: Replace any 'catch( ... )' with 'PtW32CatchAll'
+         * if you want Pthread-Win32 cancelation and pthread_exit to work.
+         */
+
+#if !defined(PtW32NoCatchWarn)
+
+#pragma message("Specify \"/DPtW32NoCatchWarn\" compiler flag to skip this message.")
+#pragma message("------------------------------------------------------------------")
+#pragma message("When compiling applications with MSVC++ and C++ exception handling:")
+#pragma message("  Replace any 'catch( ... )' in routines called from POSIX threads")
+#pragma message("  with 'PtW32CatchAll' or 'CATCHALL' if you want POSIX thread")
+#pragma message("  cancelation and pthread_exit to work. For example:")
+#pragma message("")
+#pragma message("    #if defined(PtW32CatchAll)")
+#pragma message("      PtW32CatchAll")
+#pragma message("    #else")
+#pragma message("      catch(...)")
+#pragma message("    #endif")
+#pragma message("        {")
+#pragma message("          /* Catchall block processing */")
+#pragma message("        }")
+#pragma message("------------------------------------------------------------------")
+
+#endif
+
+#define PtW32CatchAll \
+        catch( ptw32_exception & ) { throw; } \
+        catch( ... )
+
+#else /* _MSC_VER */
+
+#define catch( E ) \
+        catch( ptw32_exception & ) { throw; } \
+        catch( E )
+
+#endif /* _MSC_VER */
+
+#endif /* __CLEANUP_CXX */
+
+#endif /* ! PTW32_BUILD */
+
+#if defined(__cplusplus)
+}                               /* End of extern "C" */
+#endif                          /* __cplusplus */
+
+#if defined(PTW32__HANDLE_DEF)
+# undef HANDLE
+#endif
+#if defined(PTW32__DWORD_DEF)
+# undef DWORD
+#endif
+
+#undef PTW32_LEVEL
+#undef PTW32_LEVEL_MAX
+
+#endif /* ! RC_INVOKED */
+
+#endif /* PTHREAD_H */
diff --git a/pthreads/sched.h b/pthreads/sched.h
new file mode 100644
index 0000000..f36a97a
--- /dev/null
+++ b/pthreads/sched.h
@@ -0,0 +1,183 @@
+/*
+ * Module: sched.h
+ *
+ * Purpose:
+ *      Provides an implementation of POSIX realtime extensions
+ *      as defined in 
+ *
+ *              POSIX 1003.1b-1993      (POSIX.1b)
+ *
+ * --------------------------------------------------------------------------
+ *
+ *      Pthreads-win32 - POSIX Threads Library for Win32
+ *      Copyright(C) 1998 John E. Bossom
+ *      Copyright(C) 1999,2005 Pthreads-win32 contributors
+ * 
+ *      Contact Email: rpj@callisto.canberra.edu.au
+ * 
+ *      The current list of contributors is contained
+ *      in the file CONTRIBUTORS included with the source
+ *      code distribution. The list can also be seen at the
+ *      following World Wide Web location:
+ *      http://sources.redhat.com/pthreads-win32/contributors.html
+ * 
+ *      This library is free software; you can redistribute it and/or
+ *      modify it under the terms of the GNU Lesser General Public
+ *      License as published by the Free Software Foundation; either
+ *      version 2 of the License, or (at your option) any later version.
+ * 
+ *      This library is distributed in the hope that it will be useful,
+ *      but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *      MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *      Lesser General Public License for more details.
+ * 
+ *      You should have received a copy of the GNU Lesser General Public
+ *      License along with this library in the file COPYING.LIB;
+ *      if not, write to the Free Software Foundation, Inc.,
+ *      59 Temple Place - Suite 330, Boston, MA 02111-1307, USA
+ */
+#if !defined(_SCHED_H)
+#define _SCHED_H
+
+#undef PTW32_SCHED_LEVEL
+
+#if defined(_POSIX_SOURCE)
+#define PTW32_SCHED_LEVEL 0
+/* Early POSIX */
+#endif
+
+#if defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 199309
+#undef PTW32_SCHED_LEVEL
+#define PTW32_SCHED_LEVEL 1
+/* Include 1b, 1c and 1d */
+#endif
+
+#if defined(INCLUDE_NP)
+#undef PTW32_SCHED_LEVEL
+#define PTW32_SCHED_LEVEL 2
+/* Include Non-Portable extensions */
+#endif
+
+#define PTW32_SCHED_LEVEL_MAX 3
+
+#if ( defined(_POSIX_C_SOURCE) && _POSIX_C_SOURCE >= 200112 )  || !defined(PTW32_SCHED_LEVEL)
+#define PTW32_SCHED_LEVEL PTW32_SCHED_LEVEL_MAX
+/* Include everything */
+#endif
+
+
+#if defined(__GNUC__) && !defined(__declspec)
+# error Please upgrade your GNU compiler to one that supports __declspec.
+#endif
+
+/*
+ * When building the library, you should define PTW32_BUILD so that
+ * the variables/functions are exported correctly. When using the library,
+ * do NOT define PTW32_BUILD, and then the variables/functions will
+ * be imported correctly.
+ */
+#if !defined(PTW32_STATIC_LIB)
+#  if defined(PTW32_BUILD)
+#    define PTW32_DLLPORT __declspec (dllexport)
+#  else
+#    define PTW32_DLLPORT __declspec (dllimport)
+#  endif
+#else
+#  define PTW32_DLLPORT
+#endif
+
+/*
+ * This is a duplicate of what is in the autoconf config.h,
+ * which is only used when building the pthread-win32 libraries.
+ */
+
+#if !defined(PTW32_CONFIG_H)
+#  if defined(WINCE)
+#    define NEED_ERRNO
+#    define NEED_SEM
+#  endif
+#  if defined(__MINGW64__)
+#    define HAVE_STRUCT_TIMESPEC
+#    define HAVE_MODE_T
+#  elif defined(_UWIN) || defined(__MINGW32__)
+#    define HAVE_MODE_T
+#  endif
+#endif
+
+/*
+ *
+ */
+
+#if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX
+#if defined(NEED_ERRNO)
+#include "need_errno.h"
+#else
+#include <errno.h>
+#endif
+#endif /* PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX */
+
+#if (defined(__MINGW64__) || defined(__MINGW32__)) || defined(_UWIN)
+# if PTW32_SCHED_LEVEL >= PTW32_SCHED_LEVEL_MAX
+/* For pid_t */
+#  include <sys/types.h>
+/* Required by Unix 98 */
+#  include <time.h>
+# else
+   typedef int pid_t;
+# endif
+#else
+ typedef int pid_t;
+#endif
+
+/* Thread scheduling policies */
+
+enum {
+  SCHED_OTHER = 0,
+  SCHED_FIFO,
+  SCHED_RR,
+  SCHED_MIN   = SCHED_OTHER,
+  SCHED_MAX   = SCHED_RR
+};
+
+struct sched_param {
+  int sched_priority;
+};
+
+#if defined(__cplusplus)
+extern "C"
+{
+#endif                          /* __cplusplus */
+
+PTW32_DLLPORT int __cdecl sched_yield (void);
+
+PTW32_DLLPORT int __cdecl sched_get_priority_min (int policy);
+
+PTW32_DLLPORT int __cdecl sched_get_priority_max (int policy);
+
+PTW32_DLLPORT int __cdecl sched_setscheduler (pid_t pid, int policy);
+
+PTW32_DLLPORT int __cdecl sched_getscheduler (pid_t pid);
+
+/*
+ * Note that this macro returns ENOTSUP rather than
+ * ENOSYS as might be expected. However, returning ENOSYS
+ * should mean that sched_get_priority_{min,max} are
+ * not implemented as well as sched_rr_get_interval.
+ * This is not the case, since we just don't support
+ * round-robin scheduling. Therefore I have chosen to
+ * return the same value as sched_setscheduler when
+ * SCHED_RR is passed to it.
+ */
+#define sched_rr_get_interval(_pid, _interval) \
+  ( errno = ENOTSUP, (int) -1 )
+
+
+#if defined(__cplusplus)
+}                               /* End of extern "C" */
+#endif                          /* __cplusplus */
+
+#undef PTW32_SCHED_LEVEL
+#undef PTW32_SCHED_LEVEL_MAX
+
+#endif                          /* !_SCHED_H */
+