diff --git a/cpu/cpu-server-driver.c b/cpu/cpu-server-driver.c index cf0ff3d7..f6714b56 100644 --- a/cpu/cpu-server-driver.c +++ b/cpu/cpu-server-driver.c @@ -61,9 +61,9 @@ bool_t rpc_cudrivergetversion_1_svc(int_result *result, struct svc_req *rqstp) bool_t rpc_cudeviceget_1_svc(int ordinal, int_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuDeviceGet(&result->int_result_u.data, ordinal); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } @@ -71,27 +71,27 @@ bool_t rpc_cudevicegetname_1_svc(int dev, str_result *result, struct svc_req *rq { result->str_result_u.str = malloc(128); LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuDeviceGetName(result->str_result_u.str, 128, dev); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } bool_t rpc_cudevicetotalmem_1_svc(int dev, u64_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuDeviceTotalMem(&result->u64_result_u.u64, dev); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } bool_t rpc_cudevicegetattribute_1_svc(int attribute, int dev, int_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuDeviceGetAttribute(&result->int_result_u.data, attribute, dev); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } @@ -99,9 +99,9 @@ bool_t rpc_cudevicegetuuid_1_svc(int dev, str_result *result, struct svc_req *rq { CUuuid uuid; LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuDeviceGetUuid(&uuid, dev); - SCHED_RELEASE; + GSCHED_RELEASE; if (result->err == 0) { memcpy(result->str_result_u.str, uuid.bytes, 16); } @@ -111,9 +111,9 @@ bool_t rpc_cudevicegetuuid_1_svc(int dev, str_result *result, struct svc_req *rq bool_t rpc_cuctxgetcurrent_1_svc(ptr_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuCtxGetCurrent((struct CUctx_st**)&result->ptr_result_u.ptr); - SCHED_RELEASE; + GSCHED_RELEASE; if ((void*)result->ptr_result_u.ptr != NULL) { unsigned int version = 0; cuCtxGetApiVersion((CUcontext)result->ptr_result_u.ptr, &version); @@ -146,11 +146,11 @@ bool_t rpc_cumodulegetfunction_1_svc(uint64_t module, char *name, ptr_result *re RECORD_ARG(1, module); RECORD_ARG(2, name); LOG(LOG_DEBUG, "(fd:%d) %s(%s)", rqstp->rq_xprt->xp_fd, __FUNCTION__, name); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuModuleGetFunction((CUfunction*)&result->ptr_result_u.ptr, resource_mg_get(&rm_streams, (void*)module), name); - SCHED_RELEASE; + GSCHED_RELEASE; if (resource_mg_create(&rm_functions, (void*)result->ptr_result_u.ptr) != 0) { LOGE(LOG_ERROR, "error in resource manager"); } @@ -164,9 +164,9 @@ bool_t rpc_cumoduleload_1_svc(char* path, ptr_result *result, RECORD_API(char*); RECORD_SINGLE_ARG(path); LOG(LOG_DEBUG, "%s(%s)", __FUNCTION__, path); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuModuleLoad((CUmodule*)&result->ptr_result_u.ptr, path); - SCHED_RELEASE; + GSCHED_RELEASE; if (resource_mg_create(&rm_modules, (void*)result->ptr_result_u.ptr) != 0) { LOGE(LOG_ERROR, "error in resource manager"); } @@ -180,9 +180,9 @@ bool_t rpc_cumoduleunload_1_svc(ptr module, int *result, RECORD_API(ptr); RECORD_SINGLE_ARG(module); LOG(LOG_DEBUG, "%s(%p)", __FUNCTION__, (void*)module); - SCHED_RETAIN; + GSCHED_RETAIN; *result = cuModuleUnload(resource_mg_get(&rm_streams, (void*)module)); - SCHED_RELEASE; + GSCHED_RELEASE; RECORD_RESULT(integer, *result); return 1; } @@ -245,18 +245,18 @@ bool_t rpc_cumemalloc_1_svc(uint64_t size, ptr_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuMemAlloc_v2((CUdeviceptr*)&result->ptr_result_u.ptr, (size_t)size); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } bool_t rpc_cuctxgetdevice_1_svc(int_result *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s", __FUNCTION__); - SCHED_RETAIN; + GSCHED_RETAIN; result->err = cuCtxGetDevice((CUdevice*)&result->int_result_u.data); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } @@ -264,10 +264,10 @@ bool_t rpc_cumemcpyhtod_1_svc(uint64_t dptr, mem_data hptr, int *result, struct svc_req *rqstp) { LOG(LOG_DEBUG, "%s(%p,%p,%d)", __FUNCTION__, dptr, hptr.mem_data_val, hptr.mem_data_len); - SCHED_RETAIN; + GSCHED_RETAIN; *result = cuMemcpyHtoD_v2((CUdeviceptr)dptr, hptr.mem_data_val, hptr.mem_data_len); - SCHED_RELEASE; + GSCHED_RELEASE; return 1; } @@ -305,9 +305,9 @@ bool_t rpc_culaunchkernel_1_svc(uint64_t f, unsigned int gridDimX, unsigned int LOGE(LOG_DEBUG, "cuLaunchKernel(func=%p, gridDim=[%d,%d,%d], blockDim=[%d,%d,%d], args=%p, sharedMem=%d, stream=%p)", f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, cuda_args, sharedMemBytes, (void*)hStream); - SCHED_RETAIN; + GSCHED_RETAIN; *result = cuLaunchKernel((CUfunction)f, gridDimX, gridDimY, gridDimZ, blockDimX, blockDimY, blockDimZ, sharedMemBytes, (CUstream)hStream, cuda_args, NULL); - SCHED_RELEASE; + GSCHED_RELEASE; free(cuda_args); return 1; diff --git a/cpu/cpu-server.c b/cpu/cpu-server.c index fcf48f40..208e52b6 100644 --- a/cpu/cpu-server.c +++ b/cpu/cpu-server.c @@ -31,8 +31,7 @@ int shm_enabled = 1; int ib_device = 0; #endif //WITH_IB -sched_t *sched; -extern sched_t sched_none; +extern gsched_t sched_none; unsigned long prog=0, vers=0; diff --git a/cpu/gsched.h b/cpu/gsched.h index 9ca4e105..107fcf3a 100644 --- a/cpu/gsched.h +++ b/cpu/gsched.h @@ -1,17 +1,17 @@ -#ifndef _SCHED_H_ -#define _SCHED_H_ +#ifndef _GSCHED_H_ +#define _GSCHED_H_ -typedef struct _sched_t { +typedef struct _gsched_t { int (*init)(void); int (*retain)(int id); int (*release)(int id); int (*rm)(int id); void (*deinit)(void); -} sched_t; +} gsched_t; -sched_t *sched; +gsched_t *sched; -#define SCHED_RETAIN sched->retain(rqstp->rq_xprt->xp_fd) -#define SCHED_RELEASE sched->release(rqstp->rq_xprt->xp_fd) +#define GSCHED_RETAIN sched->retain(rqstp->rq_xprt->xp_fd) +#define GSCHED_RELEASE sched->release(rqstp->rq_xprt->xp_fd) -#endif //_SCHED_H_ +#endif //_GSCHED_H_ diff --git a/cpu/gsched_none.c b/cpu/gsched_none.c index 69176933..49f79b6e 100644 --- a/cpu/gsched_none.c +++ b/cpu/gsched_none.c @@ -5,28 +5,51 @@ #include #include -typedef struct _sched_none_t { +typedef struct _gsched_none_t { int id; int device; -} sched_none_t; +} gsched_none_t; static list ids; static pthread_mutex_t mutex_device; +static pthread_mutex_t mutex_ids; +static int cuda_max_devices; -int sched_none_init(void) +int gsched_none_init(void) { LOG(LOG_DEBUG, "sched_none_init"); - list_init(&ids, sizeof(sched_none_t)); + list_init(&ids, sizeof(gsched_none_t)); pthread_mutex_init(&mutex_device, NULL); + pthread_mutex_init(&mutex_ids, NULL); + if (cudaGetDeviceCount(&cuda_max_devices) != CUDA_SUCCESS) { + return 1; + } return 0; } -int sched_none_retain(int id) +static int gsched_none_device_sched(void) { - sched_none_t *elem = NULL; + static int next_device_id = 0; + int ret = next_device_id; + next_device_id = (next_device_id + 1) % cuda_max_devices; + return ret; +} + +int gsched_none_retain(int id) +{ + gsched_none_t *elem = NULL; + int ret = 1; LOG(LOG_DEBUG, "sched_none_retain(%d)", id); + + //TODO: if ids.length == 1 bypass this. + + if (pthread_mutex_lock(&mutex_ids) != 0) { + LOGE(LOG_ERROR, "mutex lock failed"); + return 1; + } + for (size_t i = 0; i < ids.length; ++i) { - elem = (sched_none_t*)list_get(&ids, i); + elem = (gsched_none_t*)list_get(&ids, i); if (id == elem->id) { break; } @@ -34,12 +57,22 @@ int sched_none_retain(int id) if (elem == NULL) { if (list_append(&ids, (void**)&elem) != 0) { LOGE(LOG_ERROR, "error adding element %d to ids list", id); - return 1; + ret = 1; + goto cleanup1; } elem->id = id; - elem->device = 0; + elem->device = gsched_none_device_sched(); LOGE(LOG_DEBUG, "added %d to ids list", id); } + cleanup1: + if (pthread_mutex_unlock(&mutex_ids) != 0) { + LOGE(LOG_ERROR, "mutex unlock failed"); + return 1; + } + + if (ret != 0) { + return ret; + } if (pthread_mutex_lock(&mutex_device) != 0) { LOGE(LOG_ERROR, "mutex lock failed"); @@ -49,49 +82,69 @@ int sched_none_retain(int id) cudaError_t err; if ((err = cudaSetDevice(elem->device)) != CUDA_SUCCESS) { LOGE(LOG_ERROR, "cudaSetDevice: %s", cudaGetErrorString(err)); + if (pthread_mutex_unlock(&mutex_device) != 0) { + LOGE(LOG_ERROR, "mutex unlock failed"); + } return 1; } return 0; } -int sched_none_release(int id) +int gsched_none_release(int id) { LOG(LOG_DEBUG, "sched_none_release(%d)", id); if (pthread_mutex_unlock(&mutex_device) != 0) { - LOGE(LOG_ERROR, "mutex lock failed"); + LOGE(LOG_ERROR, "mutex unlock failed"); return 1; } return 1; } -int sched_none_rm(int id) +int gsched_none_rm(int id) { size_t i; + int ret = 1; LOG(LOG_DEBUG, "sched_none_rm(%d)", id); + + if (pthread_mutex_lock(&mutex_ids) != 0) { + LOGE(LOG_ERROR, "mutex lock failed"); + return 1; + } + for (i = 0; i < ids.length; ++i) { - if (id == ((sched_none_t*)list_get(&ids, i))->id) { + if (id == ((gsched_none_t*)list_get(&ids, i))->id) { break; } } if (i == ids.length) { LOGE(LOG_ERROR, "the id %d does not exist in ids list", id); - return 1; + ret = 1; + goto cleanup; } else { - return list_rm(&ids, i); + ret = list_rm(&ids, i); + goto cleanup; } + cleanup: + if (pthread_mutex_unlock(&mutex_ids) != 0) { + LOGE(LOG_ERROR, "mutex unlock failed"); + return 1; + } + return ret; + } -void sched_none_deinit(void) +void gsched_none_deinit(void) { LOG(LOG_DEBUG, "sched_none_deinit"); list_free(&ids); pthread_mutex_destroy(&mutex_device); + pthread_mutex_destroy(&mutex_ids); } -sched_t sched_none = { - .init = sched_none_init, - .retain = sched_none_retain, - .release = sched_none_release, - .rm = sched_none_rm, - .deinit = sched_none_deinit +gsched_t sched_none = { + .init = gsched_none_init, + .retain = gsched_none_retain, + .release = gsched_none_release, + .rm = gsched_none_rm, + .deinit = gsched_none_deinit };