diff --git a/3rdparty/stb/include/stb_image_write.h b/3rdparty/stb/include/stb_image_write.h index e4b32ed1bc3..6b6c03ed77c 100644 --- a/3rdparty/stb/include/stb_image_write.h +++ b/3rdparty/stb/include/stb_image_write.h @@ -1260,6 +1260,8 @@ static void stbiw__jpg_writeBits(stbi__write_context *s, int *bitBufP, int *bitC if(c == 255) { stbiw__putc(s, 0); } + // FIXME: https://github.com/nothings/stb/issues/1433 + bitBuf &= 0x0000FFFF; bitBuf <<= 8; bitCnt -= 8; } diff --git a/Makefile b/Makefile index 167d071585b..3d6f658db3a 100644 --- a/Makefile +++ b/Makefile @@ -94,7 +94,14 @@ NVCC=nvcc OPTS=-Ofast LDFLAGS= -lm -pthread COMMON= -Iinclude/ -I3rdparty/stb/include -CFLAGS=-Wall -Wfatal-errors -Wno-unused-result -Wno-unknown-pragmas -fPIC -rdynamic +CFLAGS=-Wall -Wno-unused-parameter -Wno-unknown-pragmas -fPIC -rdynamic + +ifeq ($(USE_CPP), 1) +# C is not C++ +CFLAGS+=-fpermissive -Wno-write-strings +else +CFLAGS+=-Wextra +endif ifeq ($(DEBUG), 1) #OPTS= -O0 -g diff --git a/include/darknet.h b/include/darknet.h index 55ab50d5da8..053422b46fa 100644 --- a/include/darknet.h +++ b/include/darknet.h @@ -553,7 +553,7 @@ struct layer { tree *softmax_tree; - size_t workspace_size; + ssize_t workspace_size; //#ifdef GPU int *indexes_gpu; @@ -704,7 +704,7 @@ typedef enum { typedef struct network { int n; int batch; - uint64_t *seen; + int64_t *seen; float *badlabels_reject_threshold; float *delta_rolling_max; float *delta_rolling_avg; @@ -818,8 +818,8 @@ typedef struct network { float **truth_gpu; float **input16_gpu; float **output16_gpu; - size_t *max_input16_size; - size_t *max_output16_size; + ssize_t *max_input16_size; + ssize_t *max_output16_size; int wait_stream; void *cuda_graph; @@ -829,11 +829,11 @@ typedef struct network { float *global_delta_gpu; float *state_delta_gpu; - size_t max_delta_gpu_size; + ssize_t max_delta_gpu_size; //#endif // GPU int optimized_memory; int dynamic_minibatch; - size_t workspace_size_limit; + ssize_t workspace_size_limit; } network; // network.h diff --git a/include/yolo_v2_class.hpp b/include/yolo_v2_class.hpp index 1d70a2c2e9b..88e8b26994f 100644 --- a/include/yolo_v2_class.hpp +++ b/include/yolo_v2_class.hpp @@ -515,7 +515,7 @@ class Tracker_optflow { if (err.rows == cur_bbox_vec.size() && status.rows == cur_bbox_vec.size()) { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + for (int i = 0; i < cur_bbox_vec.size(); ++i) { cv::Point2f cur_key_pt = cur_pts_flow.at(0, i); cv::Point2f prev_key_pt = prev_pts_flow.at(0, i); @@ -573,7 +573,7 @@ class preview_boxes_t { preview_box_track_t() : track_id(0), obj_id(0), last_showed_frames_ago(frames_history), current_detection(false) {} }; std::vector preview_box_track_id; - size_t const preview_box_size, bottom_offset; + int const preview_box_size, bottom_offset; bool const one_off_detections; public: preview_boxes_t(size_t _preview_box_size = 100, size_t _bottom_offset = 100, bool _one_off_detections = false) : @@ -876,8 +876,8 @@ class track_kalman_t float time_wait = 0.5; // 0.5 second if (track_id_state_id_time[state_id].track_id > -1) { - if ((result_vec_pred[state_id].x > img_size.width) || - (result_vec_pred[state_id].y > img_size.height)) + if (((int)result_vec_pred[state_id].x > img_size.width) || + ((int)result_vec_pred[state_id].y > img_size.height)) { track_id_state_id_time[state_id].track_id = -1; } @@ -897,7 +897,7 @@ class track_kalman_t float min_dist = std::numeric_limits::max(); - for (size_t i = 0; i < max_objects; ++i) + for (int i = 0; i < max_objects; ++i) { if (track_id_state_id_time[i].track_id > -1 && result_vec_pred[i].obj_id == find_box.obj_id && busy_vec[i] == false) { @@ -987,7 +987,7 @@ class track_kalman_t clear_old_states(); std::vector result_vec; - for (size_t i = 0; i < max_objects; ++i) + for (int i = 0; i < max_objects; ++i) { tst_t tst = track_id_state_id_time[i]; if (tst.track_id > -1) { @@ -1022,7 +1022,7 @@ class track_kalman_t calc_dt(); clear_old_states(); - for (size_t i = 0; i < max_objects; ++i) + for (int i = 0; i < max_objects; ++i) track_id_state_id_time[i].detection_count--; std::vector tst_vec = find_state_ids(result_vec); diff --git a/src/activations.c b/src/activations.c index 9e8a49f72f7..07ac9a2d189 100644 --- a/src/activations.c +++ b/src/activations.c @@ -9,6 +9,15 @@ char *get_activation_string(ACTIVATION a) { switch(a){ + case RELU6: + case SWISH: + case HARD_MISH: + case NORM_CHAN: + case NORM_CHAN_SOFTMAX: + case NORM_CHAN_SOFTMAX_MAXVAL: + case MISH: + assert(0); + case LOGISTIC: return "logistic"; case LOGGY: @@ -77,6 +86,14 @@ ACTIVATION get_activation(char *s) float activate(float x, ACTIVATION a) { switch(a){ + case RELU6: + case SWISH: + case HARD_MISH: + case NORM_CHAN: + case NORM_CHAN_SOFTMAX: + case NORM_CHAN_SOFTMAX_MAXVAL: + case MISH: + assert(0); case LINEAR: return linear_activate(x); case LOGISTIC: @@ -308,6 +325,10 @@ void gradient_array_normalize_channels(float *x, const int n, int batch, int cha float gradient(float x, ACTIVATION a) { switch(a){ + case SWISH: + case HARD_MISH: + case MISH: + assert(0); case LINEAR: return linear_gradient(x); case LOGISTIC: @@ -321,9 +342,10 @@ float gradient(float x, ACTIVATION a) case NORM_CHAN: //return relu_gradient(x); case NORM_CHAN_SOFTMAX_MAXVAL: - //... + // fallthrough case NORM_CHAN_SOFTMAX: error("Error: should be used custom NORM_CHAN or NORM_CHAN_SOFTMAX-function for gradient", DARKNET_LOC); + break; case ELU: return elu_gradient(x); case SELU: diff --git a/src/blas.c b/src/blas.c index 122bca0ceaa..7d35360726e 100644 --- a/src/blas.c +++ b/src/blas.c @@ -204,7 +204,7 @@ void backward_shortcut_multilayer_cpu(int size, int src_outputs, int batch, int int add_outputs = outputs_of_layers[i]; if (src_i < add_outputs) { int add_index = add_outputs*src_b + src_i; - int out_index = id; + // int out_index = id; float *layer_delta = layers_delta[i]; if (weights) { @@ -506,7 +506,7 @@ void constrain_cpu(int size, float ALPHA, float *X) } } -void fix_nan_and_inf_cpu(float *input, size_t size) +void fix_nan_and_inf_cpu(float *input, int size) { int i; for (i = 0; i < size; ++i) { @@ -530,7 +530,7 @@ void get_embedding(float *src, int src_w, int src_h, int src_c, int embedding_si // Euclidean_norm -float math_vector_length(float *A, unsigned int feature_size) +float math_vector_length(float *A, int feature_size) { float sum = 0; int i; @@ -542,7 +542,7 @@ float math_vector_length(float *A, unsigned int feature_size) return vector_length; } -float cosine_similarity(float *A, float *B, unsigned int feature_size) +float cosine_similarity(float *A, float *B, int feature_size) { float mul = 0.0, d_a = 0.0, d_b = 0.0; @@ -561,9 +561,9 @@ float cosine_similarity(float *A, float *B, unsigned int feature_size) return similarity; } -int get_sim_P_index(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) +int get_sim_P_index(int i, int j, contrastive_params *contrast_p, int contrast_p_size) { - size_t z; + int z; for (z = 0; z < contrast_p_size; ++z) { if (contrast_p[z].i == i && contrast_p[z].j == j) break; } @@ -574,9 +574,9 @@ int get_sim_P_index(size_t i, size_t j, contrastive_params *contrast_p, int cont return z; // found } -int check_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) +int check_sim(int i, int j, contrastive_params *contrast_p, int contrast_p_size) { - size_t z; + int z; for (z = 0; z < contrast_p_size; ++z) { if (contrast_p[z].i == i && contrast_p[z].j == j) break; } @@ -587,28 +587,28 @@ int check_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p return 1; // found } -float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) +float find_sim(int i, int j, contrastive_params *contrast_p, int contrast_p_size) { - size_t z; + int z; for (z = 0; z < contrast_p_size; ++z) { if (contrast_p[z].i == i && contrast_p[z].j == j) break; } if (z == contrast_p_size) { - printf(" Error: find_sim(): sim isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z); + printf(" Error: find_sim(): sim isn't found: i = %d, j = %d, z = %d \n", i, j, z); error("Error!", DARKNET_LOC); } return contrast_p[z].sim; } -float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size) +float find_P_constrastive(int i, int j, contrastive_params *contrast_p, int contrast_p_size) { - size_t z; + int z; for (z = 0; z < contrast_p_size; ++z) { if (contrast_p[z].i == i && contrast_p[z].j == j) break; } if (z == contrast_p_size) { - printf(" Error: find_P_constrastive(): P isn't found: i = %zu, j = %zu, z = %zu \n", i, j, z); + printf(" Error: find_P_constrastive(): P isn't found: i = %d, j = %d, z = %d \n", i, j, z); error("Error!", DARKNET_LOC); } @@ -616,11 +616,11 @@ float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, in } // num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) +float P_constrastive_f_det(int il, int *labels, float **z, int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) { const float sim = contrast_p[il].sim; - const size_t i = contrast_p[il].i; - const size_t j = contrast_p[il].j; + const int i = contrast_p[il].i; + const int j = contrast_p[il].j; const float numerator = expf(sim / temperature); @@ -645,10 +645,10 @@ float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int featu } // num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) +float P_constrastive_f(int i, int l, int *labels, float **z, int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size) { if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l); + fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); error("Error!", DARKNET_LOC); } @@ -675,10 +675,10 @@ float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int return result; } -void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size) +void grad_contrastive_loss_positive_f(int i, int *class_ids, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size) { const float vec_len = math_vector_length(z[i], feature_size); - size_t j; + int j; float N = 0; for (j = 0; j < num_of_samples; ++j) { if (labels[i] == labels[j] && labels[i] >= 0) N++; @@ -720,10 +720,10 @@ void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, siz } } -void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max) +void grad_contrastive_loss_negative_f(int i, int *class_ids, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max) { const float vec_len = math_vector_length(z[i], feature_size); - size_t j; + int j; float N = 0; for (j = 0; j < num_of_samples; ++j) { if (labels[i] == labels[j] && labels[i] >= 0) N++; @@ -741,7 +741,7 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz //if (i != j && (i/2) == (j/2)) { if (labels[i] >= 0 && labels[i] == labels[j] && i != j) { - size_t k; + int k; for (k = 0; k < num_of_samples; ++k) { //if (k != i && k != j && labels[k] != labels[i]) { if (k != i && k != j && labels[k] != labels[i] && class_ids[j] == class_ids[k]) { @@ -777,10 +777,10 @@ void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, siz // num_of_samples = 2 * loaded_images = mini_batch_size -float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim) +float P_constrastive(int i, int l, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *exp_cos_sim) { if (i == l) { - fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %zu, l = %zu \n", i, l); + fprintf(stderr, " Error: in P_constrastive must be i != l, while i = %d, l = %d \n", i, l); error("Error!", DARKNET_LOC); } @@ -808,10 +808,10 @@ float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, flo // z[feature_size][num_of_samples] - array of arrays with contrastive features (output of conv-layer, f.e. 128 floats for each sample) // delta[feature_size] - array with deltas for backpropagation // temperature - scalar temperature param (temperature > 0), f.e. temperature = 0.07: Supervised Contrastive Learning -void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) +void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) { const float vec_len = math_vector_length(z[i], feature_size); - size_t j; + int j; float N = 0; for (j = 0; j < num_of_samples; ++j) { if (labels[i] == labels[j]) N++; @@ -848,10 +848,10 @@ void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples // z[feature_size][num_of_samples] - array of arrays with contrastive features (output of conv-layer, f.e. 128 floats for each sample) // delta[feature_size] - array with deltas for backpropagation // temperature - scalar temperature param (temperature > 0), f.e. temperature = 0.07: Supervised Contrastive Learning -void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) +void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh) { const float vec_len = math_vector_length(z[i], feature_size); - size_t j; + int j; float N = 0; for (j = 0; j < num_of_samples; ++j) { if (labels[i] == labels[j]) N++; @@ -866,7 +866,7 @@ void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples //if (i != j && (i/2) == (j/2)) { if (i != j && labels[i] == labels[j]) { - size_t k; + int k; for (k = 0; k < num_of_samples; ++k) { //if (k != i && k != j && labels[k] != labels[i]) { if (k != i && k != j && labels[k] >= 0) { diff --git a/src/blas.h b/src/blas.h index b69a702fa93..7d9f9be4614 100644 --- a/src/blas.h +++ b/src/blas.h @@ -57,23 +57,23 @@ void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int for void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output); void softmax_x_ent_cpu(int n, float *pred, float *truth, float *delta, float *error); void constrain_cpu(int size, float ALPHA, float *X); -void fix_nan_and_inf_cpu(float *input, size_t size); +void fix_nan_and_inf_cpu(float *input, int size); -int check_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float find_sim(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float find_P_constrastive(size_t i, size_t j, contrastive_params *contrast_p, int contrast_p_size); -float P_constrastive_f_det(size_t il, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); -float P_constrastive_f(size_t i, size_t l, int *labels, float **z, unsigned int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); -void grad_contrastive_loss_positive_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size); -void grad_contrastive_loss_negative_f(size_t i, int *class_ids, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max); +int check_sim(int i, int j, contrastive_params *contrast_p, int contrast_p_size); +float find_sim(int i, int j, contrastive_params *contrast_p, int contrast_p_size); +float find_P_constrastive(int i, int j, contrastive_params *contrast_p, int contrast_p_size); +float P_constrastive_f_det(int il, int *labels, float **z, int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); +float P_constrastive_f(int i, int l, int *labels, float **z, int feature_size, float temperature, contrastive_params *contrast_p, int contrast_p_size); +void grad_contrastive_loss_positive_f(int i, int *class_ids, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size); +void grad_contrastive_loss_negative_f(int i, int *class_ids, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *delta, int wh, contrastive_params *contrast_p, int contrast_p_size, int neg_max); void get_embedding(float *src, int src_w, int src_h, int src_c, int embedding_size, int cur_w, int cur_h, int cur_n, int cur_b, float *dst); -float math_vector_length(float *A, unsigned int feature_size); -float cosine_similarity(float *A, float *B, unsigned int feature_size); -float P_constrastive(size_t i, size_t l, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *exp_cos_sim); -void grad_contrastive_loss_positive(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); -void grad_contrastive_loss_negative(size_t i, int *labels, size_t num_of_samples, float **z, unsigned int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); +float math_vector_length(float *A, int feature_size); +float cosine_similarity(float *A, float *B, int feature_size); +float P_constrastive(int i, int l, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *exp_cos_sim); +void grad_contrastive_loss_positive(int i, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); +void grad_contrastive_loss_negative(int i, int *labels, int num_of_samples, float **z, int feature_size, float temperature, float *cos_sim, float *p_constrastive, float *delta, int wh); #ifdef GPU @@ -171,7 +171,7 @@ void rotate_weights_gpu(const float *src_weight_gpu, float *weight_deform_gpu, i void reduce_and_expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups); void expand_array_gpu(const float *src_gpu, float *dst_gpu, int size, int groups); void mult_inverse_array_gpu(const float *src_gpu, float *dst_gpu, int size, float eps, float divider, float clip, float abs_add); -void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); +void P_constrastive_f_det_gpu(int *labels, int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size); void coord_conv_gpu(float *dst, int size, int w, int h, int chan, int b, int type); void forward_implicit_gpu(int batch, int nweights, float *weight_gpu, float *output_gpu); diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu index 3bc0d90b553..9794765ee75 100644 --- a/src/blas_kernels.cu +++ b/src/blas_kernels.cu @@ -2378,7 +2378,7 @@ __global__ void P_constrastive_f_det_kernel(int *labels, unsigned int feature_si } -extern "C" void P_constrastive_f_det_gpu(int *labels, unsigned int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size) +extern "C" void P_constrastive_f_det_gpu(int *labels, int feature_size, float temperature, contrastive_params *contrast_p, const int contrast_p_size) { const int block_size = BLOCK; const int num_blocks = get_number_of_blocks(contrast_p_size, block_size); diff --git a/src/box.c b/src/box.c index 0ad1263e56f..a0a53586f2c 100644 --- a/src/box.c +++ b/src/box.c @@ -152,6 +152,7 @@ float box_iou_kind(box a, box b, IOU_LOSS iou_kind) { //IOU, GIOU, MSE, DIOU, CIOU switch(iou_kind) { + case MSE: assert(0); case IOU: return box_iou(a, b); case GIOU: return box_giou(a, b); case DIOU: return box_diou(a, b); @@ -893,9 +894,9 @@ void diounms_sort(detection *dets, int total, int classes, float thresh, NMS_KIN box b = dets[j].bbox; if (box_iou(a, b) > thresh && nms_kind == CORNERS_NMS) { - float sum_prob = pow(dets[i].prob[k], 2) + pow(dets[j].prob[k], 2); - float alpha_prob = pow(dets[i].prob[k], 2) / sum_prob; - float beta_prob = pow(dets[j].prob[k], 2) / sum_prob; + //float sum_prob = pow(dets[i].prob[k], 2) + pow(dets[j].prob[k], 2); + // float alpha_prob = pow(dets[i].prob[k], 2) / sum_prob; + //float beta_prob = pow(dets[j].prob[k], 2) / sum_prob; //dets[i].bbox.x = (dets[i].bbox.x*alpha_prob + dets[j].bbox.x*beta_prob); //dets[i].bbox.y = (dets[i].bbox.y*alpha_prob + dets[j].bbox.y*beta_prob); //dets[i].bbox.w = (dets[i].bbox.w*alpha_prob + dets[j].bbox.w*beta_prob); diff --git a/src/captcha.c b/src/captcha.c index 5fd565d0442..9c97ddb2f30 100644 --- a/src/captcha.c +++ b/src/captcha.c @@ -105,16 +105,16 @@ void test_captcha(char *cfgfile, char *weightfile, char *filename) srand(2222222); int i = 0; char** names = get_labels("data/captcha/reimgs.labels.list"); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int indexes[26]; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); }else{ //printf("Enter Image Path: "); //fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) return; strtok(input, "\n"); } diff --git a/src/classifier.c b/src/classifier.c index f01303327cf..4b74816bd93 100644 --- a/src/classifier.c +++ b/src/classifier.c @@ -32,7 +32,9 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int i; float avg_loss = -1; +#ifdef OPENCV float avg_contrastive_acc = 0; +#endif char *base = basecfg(cfgfile); printf("%s\n", base); printf("%d\n", ngpus); @@ -143,7 +145,7 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, int iter_topk = get_current_batch(net); float topk = 0; - int count = 0; + //int count = 0; double start, time_remaining, avg_time = -1, alpha_time = 0.01; start = what_time_is_it_now(); @@ -186,8 +188,9 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, else fprintf(stderr, " Tensor Cores are used.\n"); } } - +#ifdef OPENCV int draw_precision = 0; +#endif if (calc_topk && (i >= calc_topk_for_each || i == net.max_batches)) { iter_topk = i; if (net.contrastive && l.type != SOFTMAX && l.type != COST) { @@ -200,7 +203,9 @@ void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int *gpus, topk = validate_classifier_single(datacfg, cfgfile, weightfile, &net, topk_data); // calc TOP-n printf("\n accuracy %s = %f \n", topk_buff, topk); } +#ifdef OPENCV draw_precision = 1; +#endif } time_remaining = ((net.max_batches - i) / ngpus) * (what_time_is_it_now() - start) / 60 / 60; @@ -759,11 +764,11 @@ void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filena char **names = get_labels(name_list); clock_t time; int* indexes = (int*)xcalloc(top, sizeof(int)); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, 255); }else{ printf("Enter Image Path: "); fflush(stdout); @@ -852,14 +857,14 @@ void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *fi int i = 0; char **names = get_labels(name_list); - clock_t time; + // clock_t time; int* indexes = (int*)xcalloc(top, sizeof(int)); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; //int size = net.w; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); }else{ printf("Enter Image Path: "); fflush(stdout); @@ -1280,7 +1285,7 @@ void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_ind int frame_counter = 0; while(1){ - struct timeval tval_before, tval_after, tval_result; + struct timeval tval_before /*, tval_after, tval_result */; gettimeofday(&tval_before, NULL); //image in = get_image_from_stream(cap); diff --git a/src/coco.c b/src/coco.c index 8ad13834b01..cb84ab9e588 100644 --- a/src/coco.c +++ b/src/coco.c @@ -87,19 +87,19 @@ void train_coco(char *cfgfile, char *weightfile) printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); if(i%1000==0 || (i < 1000 && i%100 == 0)){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s_%d.weights", backup_directory, base, i); save_weights(net, buff); } if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s.backup", backup_directory, base); save_weights(net, buff); } free_data(train); } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s_final.weights", backup_directory, base); save_weights(net, buff); } @@ -245,7 +245,7 @@ void validate_coco_recall(char *cfgfile, char *weightfile) fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); srand(time(0)); - char *base = "results/comp4_det_test_"; + //char *base = "results/comp4_det_test_"; list* plist = get_paths("data/voc/test/2007_test.txt"); char **paths = (char **)list_to_array(plist); @@ -343,7 +343,7 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) srand(2222222); float nms = .4; clock_t time; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int j; box* boxes = (box*)xcalloc(l.side * l.side * l.n, sizeof(box)); @@ -353,11 +353,11 @@ void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh) } while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); } else { printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) break; strtok(input, "\n"); } diff --git a/src/connected_layer.c b/src/connected_layer.c index 244e82fbb86..09b0a25eac3 100644 --- a/src/connected_layer.c +++ b/src/connected_layer.c @@ -336,14 +336,8 @@ void forward_connected_layer_gpu(connected_layer l, network_state state) { fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); - int m = l.batch; - int k = l.inputs; - int n = l.outputs; - float * a = state.input; - float * b = l.weights_gpu; - float * c = l.output_gpu; #ifdef CUDNN - float one = 1; // alpha[0], beta[0] + //float one = 1; // alpha[0], beta[0] float alpha = 1, beta = 0; CHECK_CUDNN(cudnnConvolutionForward(cudnn_handle(), @@ -360,6 +354,12 @@ void forward_connected_layer_gpu(connected_layer l, network_state state) l.dstTensorDesc, l.output_gpu)); #else // CUDNN + int m = l.batch; + int k = l.inputs; + int n = l.outputs; + float * a = state.input; + float * b = l.weights_gpu; + float * c = l.output_gpu; gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n); #endif // CUDNN diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu index debd6bf159e..3a097459d20 100644 --- a/src/convolutional_kernels.cu +++ b/src/convolutional_kernels.cu @@ -440,8 +440,8 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) // 2. or CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED // More: http://docs.nvidia.com/deeplearning/sdk/cudnn-developer-guide/index.html#tensor_ops - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t output16_size = l.batch*l.out_c*l.out_h*l.out_w; + const ssize_t input16_size = l.batch*l.c*l.w*l.h; + const ssize_t output16_size = l.batch*l.out_c*l.out_h*l.out_w; if (*state.net.max_input16_size < input16_size) { //printf("\n input16_size: cur = %zu \t max = %zu \n", input16_size, *state.net.max_input16_size); @@ -627,7 +627,7 @@ void forward_convolutional_layer_gpu(convolutional_layer l, network_state state) if(l.assisted_excitation && state.train) assisted_excitation_forward_gpu(l, state); if (l.antialiasing) { - network_state s = { 0 }; + network_state s = { }; s.train = state.train; s.workspace = state.workspace; s.net = state.net; @@ -650,7 +650,7 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state } if (l.antialiasing) { - network_state s = { 0 }; + network_state s = { }; s.train = state.train; s.workspace = state.workspace; s.net = state.net; @@ -694,8 +694,8 @@ void backward_convolutional_layer_gpu(convolutional_layer l, network_state state if (state.index != 0 && state.net.cudnn_half && !l.xnor && (!state.train || (iteration_num > 3 * state.net.burn_in) && state.net.loss_scale != 1) && (l.c / l.groups) % 8 == 0 && l.n % 8 == 0 && l.groups <= 1 && l.size > 1) { - const size_t input16_size = l.batch*l.c*l.w*l.h; - const size_t delta16_size = l.batch*l.n*l.out_w*l.out_h; + const ssize_t input16_size = l.batch*l.c*l.w*l.h; + const ssize_t delta16_size = l.batch*l.n*l.out_w*l.out_h; if (*state.net.max_input16_size < input16_size) { *state.net.max_input16_size = input16_size; diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c index 18f9e8b7f10..4c284842aac 100644 --- a/src/convolutional_layer.c +++ b/src/convolutional_layer.c @@ -887,8 +887,10 @@ void test_convolutional_layer() void resize_convolutional_layer(convolutional_layer *l, int w, int h) { int total_batch = l->batch*l->steps; +#ifdef GPU int old_w = l->w; int old_h = l->h; +#endif l->w = w; l->h = h; int out_w = convolutional_out_width(*l); @@ -967,7 +969,7 @@ void resize_convolutional_layer(convolutional_layer *l, int w, int h) size_t free_byte; size_t total_byte; CHECK_CUDA(cudaMemGetInfo(&free_byte, &total_byte)); - if (l->workspace_size > free_byte || l->workspace_size >= total_byte / 2) { + if (l->workspace_size > (ssize_t)free_byte || l->workspace_size >= (ssize_t)total_byte / 2) { printf(" used slow CUDNN algo without Workspace! Need memory: %zu, available: %zu\n", l->workspace_size, (free_byte < total_byte/2) ? free_byte : total_byte/2); cudnn_convolutional_setup(l, cudnn_smallest, 0); l->workspace_size = get_convolutional_workspace_size(*l); @@ -1061,9 +1063,9 @@ void float_to_bit(float *src, unsigned char *dst, size_t size) { void bit_to_float(unsigned char *src, float *dst, size_t size, size_t filters, float *mean_arr) { memset(dst, 0, size *sizeof(float)); - size_t i; + int i; - for (i = 0; i < size; ++i) { + for (i = 0; i < (int)size; ++i) { float mean_val = 1; if(mean_arr != NULL) mean_val = fabs(mean_arr[i / (size / filters)]); if(get_bit(src, i)) dst[i] = mean_val; @@ -1075,17 +1077,17 @@ void binary_align_weights(convolutional_layer *l) { int m = l->n; // (l->n / l->groups) int k = l->size*l->size*l->c; // ->size*l->size*(l->c / l->groups) - size_t new_lda = k + (l->lda_align - k % l->lda_align); // (k / 8 + 1) * 8; + int new_lda = k + (l->lda_align - k % l->lda_align); // (k / 8 + 1) * 8; l->new_lda = new_lda; binarize_weights(l->weights, m, k, l->binary_weights); - size_t align_weights_size = new_lda * m; + int align_weights_size = new_lda * m; l->align_bit_weights_size = align_weights_size / 8 + 1; - float* align_weights = (float*)xcalloc(align_weights_size, sizeof(float)); - l->align_bit_weights = (char*)xcalloc(l->align_bit_weights_size, sizeof(char)); + float* align_weights = xcalloc(align_weights_size, sizeof(float)); + l->align_bit_weights = xcalloc(l->align_bit_weights_size, sizeof(char)); - size_t i, j; + int i, j; // align A without transpose for (i = 0; i < m; ++i) { for (j = 0; j < k; ++j) { @@ -1245,7 +1247,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //printf(" l.index = %d - new XNOR \n", l.index); int ldb_align = l.lda_align; - size_t new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; + int new_ldb = k + (ldb_align - k%ldb_align); // (k / 8 + 1) * 8; //size_t t_intput_size = new_ldb * l.bit_align;// n; //size_t t_bit_input_size = t_intput_size / 8;// +1; @@ -1332,7 +1334,7 @@ void forward_convolutional_layer(convolutional_layer l, network_state state) //size_t ldb_align = 256; // 256 bit for AVX2 int ldb_align = l.lda_align; size_t new_ldb = k + (ldb_align - k%ldb_align); - size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align); + // size_t t_intput_size = binary_transpose_align_input(k, n, state.workspace, &l.t_bit_input, ldb_align, l.bit_align); // 5x times faster than gemm()-float32 gemm_nn_custom_bin_mean_transposed(m, n, k, 1, (unsigned char*)l.align_bit_weights, new_ldb, (unsigned char*)l.t_bit_input, new_ldb, c, n, l.mean_arr); diff --git a/src/dark_cuda.c b/src/dark_cuda.c index 74f067724db..93166f9ec9b 100644 --- a/src/dark_cuda.c +++ b/src/dark_cuda.c @@ -215,7 +215,7 @@ void cudnn_check_error_extended(cudnnStatus_t status, const char * const filenam #endif if (cuda_debug_sync) { cudaError_t status = cudaDeviceSynchronize(); - if (status != CUDNN_STATUS_SUCCESS) + if (status != (enum cudaError)CUDNN_STATUS_SUCCESS) printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line); } cudnn_check_error(status, filename, function, line); @@ -249,7 +249,7 @@ void cublas_check_error_extended(cublasStatus_t status, const char * const filen #endif if (cuda_debug_sync) { cudaError_t status = cudaDeviceSynchronize(); - if (status != CUDA_SUCCESS) + if (status != (enum cudaError)CUDA_SUCCESS) printf("\n cudaError_t status = cudaDeviceSynchronize() Error in: file: %s function: %s() line: %d\n", filename, function, line); } cublas_check_error(status); @@ -271,8 +271,8 @@ cublasHandle_t blas_handle() } -static int switchBlasInit[16] = { 0 }; -static cublasHandle_t switchBlasHandle[16]; +// static int switchBlasInit[16] = { 0 }; +// static cublasHandle_t switchBlasHandle[16]; static cudaStream_t switchStreamsArray[16]; static int switchStreamInit[16] = { 0 }; @@ -350,10 +350,10 @@ void reset_wait_stream_events() { static float **pinned_ptr = NULL; -static size_t pinned_num_of_blocks = 0; -static size_t pinned_index = 0; -static size_t pinned_block_id = 0; -static const size_t pinned_block_size = (size_t)1024 * 1024 * 1024 * 1; // 1 GB block size +static int pinned_num_of_blocks = 0; +static int pinned_index = 0; +static int pinned_block_id = 0; +static const size_t pinned_block_size = 1024 * 1024 * 1024 * 1; // 1 GB block size static pthread_mutex_t mutex_pinned = PTHREAD_MUTEX_INITIALIZER; // free CPU-pinned memory @@ -372,7 +372,7 @@ void free_pinned_memory() // custom CPU-pinned memory allocation void pre_allocate_pinned_memory(const size_t size) { - const size_t num_of_blocks = size / pinned_block_size + ((size % pinned_block_size) ? 1 : 0); + const int num_of_blocks = size / pinned_block_size + ((size % pinned_block_size) ? 1 : 0); printf("pre_allocate... pinned_ptr = %p \n", (void *)pinned_ptr); pthread_mutex_lock(&mutex_pinned); @@ -380,7 +380,7 @@ void pre_allocate_pinned_memory(const size_t size) pinned_ptr = (float **)calloc(num_of_blocks, sizeof(float *)); if(!pinned_ptr) error("calloc failed in pre_allocate()", DARKNET_LOC); - printf("pre_allocate: size = %zu MB, num_of_blocks = %zu, block_size = %zu MB \n", + printf("pre_allocate: size = %zu MB, num_of_blocks = %d, block_size = %zu MB \n", size / (1024*1024), num_of_blocks, pinned_block_size / (1024 * 1024)); int k; @@ -411,7 +411,7 @@ float *cuda_make_array_pinned_preallocated(float *x, size_t n) { if ((allocation_size + pinned_index) > pinned_block_size) { const float filled = (float)100 * pinned_index / pinned_block_size; - printf("\n Pinned block_id = %zu, filled = %f %% \n", pinned_block_id, filled); + printf("\n Pinned block_id = %d, filled = %f %% \n", pinned_block_id, filled); pinned_block_id++; pinned_index = 0; } diff --git a/src/data.c b/src/data.c index 70e1b09b2f4..fa05e61abb1 100644 --- a/src/data.c +++ b/src/data.c @@ -401,7 +401,7 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float char buff[256]; if (id >= classes) { printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d], file: %s \n", id, (classes-1), labelpath); - sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1)); + snprintf(buff, sizeof buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, (classes-1)); system(buff); ++sub; continue; @@ -414,27 +414,29 @@ int fill_truth_detection(const char *path, int num_boxes, int truth_size, float } if (x == 999999 || y == 999999) { printf("\n Wrong annotation: x = 0, y = 0, < 0 or > 1, file: %s \n", labelpath); - sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); + snprintf(buff, sizeof buff,"echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); system(buff); ++sub; continue; } if (x <= 0 || x > 1 || y <= 0 || y > 1) { printf("\n Wrong annotation: x = %f, y = %f, file: %s \n", x, y, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); + snprintf(buff, sizeof buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); system(buff); ++sub; continue; } if (w > 1) { printf("\n Wrong annotation: w = %f, file: %s \n", w, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); + if (strlen(labelpath) > 251) + error("path too long", DARKNET_LOC); + snprintf(buff, sizeof buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); system(buff); w = 1; } if (h > 1) { printf("\n Wrong annotation: h = %f, file: %s \n", h, labelpath); - sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); + snprintf(buff, sizeof buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); system(buff); h = 1; } @@ -474,7 +476,7 @@ void fill_truth_captcha(char *path, int n, float *truth) char *begin = strrchr(path, '/'); ++begin; int i; - for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ + for(i = 0; i < (int)strlen(begin) && i < n && begin[i] != '.'; ++i){ int index = alphanum_to_int(begin[i]); if(index > 35) printf("Bad %c\n", begin[i]); truth[i*NUMCHARS+index] = 1; @@ -1073,7 +1075,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*c; - float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale = 0; + float r1 = 0, r2 = 0, r3 = 0, r4 = 0; //, r_scale = 0; float resize_r1 = 0, resize_r2 = 0; float dhue = 0, dsat = 0, dexp = 0, flip = 0, blur = 0; int augmentation_calculated = 0, gaussian_noise = 0; @@ -1132,7 +1134,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo flip = use_flip ? random_gen() % 2 : 0; } - r_scale = random_float(); + //r_scale = random_float(); if (!contrastive || contrastive_color || i % 2 == 0) { @@ -1286,7 +1288,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo const int bot_shift = min_val_cmp(h - cut_y[i], max_val_cmp(0, (-pbot*h / oh))); - int k, x, y; + int k, y; for (k = 0; k < c; ++k) { for (y = 0; y < h; ++y) { int j = y*w + k*w*h; @@ -1396,7 +1398,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo d.X.vals = (float**)xcalloc(d.X.rows, sizeof(float*)); d.X.cols = h*w*c; - float r1 = 0, r2 = 0, r3 = 0, r4 = 0, r_scale; + float r1 = 0, r2 = 0, r3 = 0, r4 = 0; //, r_scale; float resize_r1 = 0, resize_r2 = 0; float dhue = 0, dsat = 0, dexp = 0, flip = 0; int augmentation_calculated = 0; @@ -1442,7 +1444,7 @@ data load_data_detection(int n, char **paths, int m, int w, int h, int c, int bo flip = use_flip ? random_gen() % 2 : 0; } - r_scale = random_float(); + // r_scale = random_float(); if (!contrastive || contrastive_color || i % 2 == 0) { diff --git a/src/demo.c b/src/demo.c index 5a01faf36e0..8d96f4a72d7 100644 --- a/src/demo.c +++ b/src/demo.c @@ -97,7 +97,7 @@ void *detect_in_thread(void *ptr) this_thread_yield(); } - layer l = net.layers[net.n - 1]; + // layer l = net.layers[net.n - 1]; float *X = det_s.data; //float *prediction = network_predict(net, X); diff --git a/src/detector.c b/src/detector.c index 0fc36142904..2b6da57cec4 100644 --- a/src/detector.c +++ b/src/detector.c @@ -61,7 +61,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i char *base = basecfg(cfgfile); printf("%s\n", base); float avg_loss = -1; +#ifdef OPENCV float avg_contrastive_acc = 0; +#endif network* nets = (network*)xcalloc(ngpus, sizeof(network)); srand(time(0)); @@ -324,8 +326,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i } printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images, %f hours left\n", iteration, loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), iteration*imgs, avg_time); fflush(stdout); - - int draw_precision = 0; +#ifdef OPENCV + int draw_precision = 0; +#endif if (calc_map && (iteration >= next_map_calc || iteration == net.max_batches)) { if (l.random) { printf("Resizing to initial size: %d x %d ", init_w, init_h); @@ -371,8 +374,9 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i sprintf(buff, "%s/%s_best.weights", backup_directory, base); save_weights(net, buff); } - +#ifdef OPENCV draw_precision = 1; +#endif } time_remaining = ((net.max_batches - iteration) / ngpus)*(what_time_is_it_now() - time + load_time) / 60 / 60; // set initial value, even if resume training from 10000 iteration @@ -454,7 +458,7 @@ void train_detector(char *datacfg, char *cfgfile, char *weightfile, int *gpus, i } } - +#if 0 static int get_coco_image_id(char *filename) { char *p = strrchr(filename, '/'); @@ -462,6 +466,7 @@ static int get_coco_image_id(char *filename) if (c) p = c; return atoi(p + 1); } +#endif static void print_cocos(FILE *fp, char *image_path, detection *dets, int num_boxes, int classes, int w, int h) { @@ -582,7 +587,7 @@ static void eliminate_bdd(char *buf, char *a) { if (a[++n] == '\0') { - for (k; buf[k + n] != '\0'; k++) + for (; buf[k + n] != '\0'; k++) { buf[k] = buf[k + n]; } @@ -701,9 +706,9 @@ void validate_detector(char *datacfg, char *cfgfile, char *weightfile, char *out if (!outfile) outfile = "kitti_results"; printf("%s\n", outfile); snprintf(buff, 1024, "%s/%s", prefix, outfile); - int mkd = make_directory(buff, 0777); + (void)make_directory(buff, 0777); snprintf(buff2, 1024, "%s/%s/data", prefix, outfile); - int mkd2 = make_directory(buff2, 0777); + (void)make_directory(buff2, 0777); kitti = 1; } else if (0 == strcmp(type, "imagenet")) { @@ -1320,13 +1325,12 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa for (point = 0; point < map_points; ++point) { double cur_recall = point * 1.0 / (map_points-1); double cur_precision = 0; - double cur_prob = 0; for (rank = 0; rank < detections_count; ++rank) { if (pr[i][rank].recall >= cur_recall) { // > or >= if (pr[i][rank].precision > cur_precision) { cur_precision = pr[i][rank].precision; - cur_prob = pr[i][rank].prob; + // cur_prob = pr[i][rank].prob; } } } @@ -1340,8 +1344,8 @@ float validate_detector_map(char *datacfg, char *cfgfile, char *weightfile, floa printf("class_id = %d, name = %s, ap = %2.2f%% \t (TP = %d, FP = %d) \n", i, names[i], avg_precision * 100, tp_for_thresh_per_class[i], fp_for_thresh_per_class[i]); - float class_precision = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)fp_for_thresh_per_class[i]); - float class_recall = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)(truth_classes_count[i] - tp_for_thresh_per_class[i])); + //float class_precision = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)fp_for_thresh_per_class[i]); + //float class_recall = (float)tp_for_thresh_per_class[i] / ((float)tp_for_thresh_per_class[i] + (float)(truth_classes_count[i] - tp_for_thresh_per_class[i])); //printf("Precision = %1.2f, Recall = %1.2f, avg IOU = %2.2f%% \n\n", class_precision, class_recall, avg_iou_per_class[i]); mean_average_precision += avg_precision; @@ -1639,7 +1643,7 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam name_list, names_size, net.layers[net.n - 1].classes, cfgfile); } srand(2222222); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; char *json_buf = NULL; int json_image_id = 0; @@ -1656,14 +1660,14 @@ void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filenam float nms = .45; // 0.4F while (1) { if (filename) { - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); if (strlen(input) > 0) if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; } else { printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if (!input) break; strtok(input, "\n"); } @@ -1800,14 +1804,14 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, } srand(2222222); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int j; float nms = .45; // 0.4F while (1) { if (filename) { - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); if (strlen(input) > 0) if (input[strlen(input) - 1] == 0x0d) input[strlen(input) - 1] = 0; } @@ -1869,7 +1873,7 @@ void draw_object(char *datacfg, char *cfgfile, char *weightfile, char *filename, float avg_loss = get_network_cost(net); draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, it_num, 0, 0, "mAP%", 0, dont_show, 0, 0); - float inv_loss = 1.0 / max_val_cmp(0.01, avg_loss); + //float inv_loss = 1.0 / max_val_cmp(0.01, avg_loss); //net.learning_rate = *lr_set * inv_loss; if (*boxonly) { diff --git a/src/dice.c b/src/dice.c index bb5d643796e..14d893b3e77 100644 --- a/src/dice.c +++ b/src/dice.c @@ -75,16 +75,16 @@ void test_dice(char *cfgfile, char *weightfile, char *filename) srand(2222222); int i = 0; char **names = dice_labels; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int indexes[6]; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); }else{ printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) return; strtok(input, "\n"); } diff --git a/src/gaussian_yolo_layer.c b/src/gaussian_yolo_layer.c index f94f4a6ce44..3a8e201c8a2 100644 --- a/src/gaussian_yolo_layer.c +++ b/src/gaussian_yolo_layer.c @@ -67,14 +67,14 @@ layer make_gaussian_yolo_layer(int batch, int w, int h, int n, int total, int *m free(l.output); - if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; + if (cudaSuccess == cudaHostAlloc((void**)&l.output, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; else { cudaGetLastError(); // reset CUDA-error l.output = (float*)calloc(batch * l.outputs, sizeof(float)); } free(l.delta); - if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; + if (cudaSuccess == cudaHostAlloc((void**)&l.delta, batch*l.outputs * sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; else { cudaGetLastError(); // reset CUDA-error l.delta = (float*)calloc(batch * l.outputs, sizeof(float)); @@ -106,7 +106,7 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) if (l->output_pinned) { CHECK_CUDA(cudaFreeHost(l->output)); - if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + if (cudaSuccess != cudaHostAlloc((void**)&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { cudaGetLastError(); // reset CUDA-error l->output = (float*)calloc(l->batch * l->outputs, sizeof(float)); l->output_pinned = 0; @@ -115,7 +115,7 @@ void resize_gaussian_yolo_layer(layer *l, int w, int h) if (l->delta_pinned) { CHECK_CUDA(cudaFreeHost(l->delta)); - if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + if (cudaSuccess != cudaHostAlloc((void**)&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { cudaGetLastError(); // reset CUDA-error l->delta = (float*)calloc(l->batch * l->outputs, sizeof(float)); l->delta_pinned = 0; diff --git a/src/gemm.c b/src/gemm.c index 256061bbed9..300bde7d8cd 100644 --- a/src/gemm.c +++ b/src/gemm.c @@ -557,7 +557,10 @@ static inline float _mm256_extract_float32(__m256 a, const int index) { #include static inline float _dn_castu32_f32(uint32_t a) { - return *((float *)&a); + float b; + static_assert(sizeof(a) == sizeof(b), ""); + memcpy(&b, &a, sizeof(b)); + return b; } static inline float _mm256_extract_float32(__m256 a, const int index) { @@ -1047,13 +1050,13 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, //__m256i all256_last_zero = _mm256_set1_epi32(0xFFFFFFFF); //all256_last_zero.m256i_i32[7] = 0; - __m256i all256_last_zero = - _mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0); + // __m256i all256_last_zero = + // _mm256_set_epi32(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0); - __m256i idx256 = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); + // __m256i idx256 = _mm256_set_epi32(0, 7, 6, 5, 4, 3, 2, 1); //__m256 all256_sing1 = _mm256_set1_ps(0x80000000); - __m256 all256_one = _mm256_set1_ps(1); - __m256i all256i_one = _mm256_set1_epi32(1); + // __m256 all256_one = _mm256_set1_ps(1); + // __m256i all256i_one = _mm256_set1_epi32(1); ///__m256i src256 = _mm256_loadu_si256((__m256i *)(&src[i])); ///__m256i result256 = _mm256_and_si256(src256, all256_sing1); // check sign in 8 x 32-bit floats @@ -1073,7 +1076,7 @@ void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, for (x = 0; x < w-8; x+=8) { int const output_index = fil*w*h + y*w + x; - float sum = 0; + //float sum = 0; __m256 sum256 = _mm256_set1_ps(0); for (chan = 0; chan < c; ++chan) { @@ -1645,7 +1648,7 @@ void im2col_cpu_custom_bin(float* data_im, // optimized version if (height_col == height && width_col == width && stride == 1 && pad == 1 && is_fma_avx2()) { - __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); + // __m256i all256_sing1 = _mm256_set_epi32(0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000, 0x80000000); __m256 float_zero256 = _mm256_set1_ps(0.00); int new_ldb = bit_align; @@ -2038,10 +2041,11 @@ void gemm_nn_bin_32bit_packed(int M, int N, int K, float ALPHA, void convolution_2d(int w, int h, int ksize, int n, int c, int pad, int stride, float *weights, float *input, float *output, float *mean) { +#if 0 const int out_h = (h + 2 * pad - ksize) / stride + 1; // output_height=input_height for stride=1 and pad=1 const int out_w = (w + 2 * pad - ksize) / stride + 1; // output_width=input_width for stride=1 and pad=1 //int i, f, j; - +#endif int fil; // filter index #pragma omp parallel for // "omp parallel for" - automatic parallelization of loop by using OpenMP diff --git a/src/http_stream.cpp b/src/http_stream.cpp index cac00a09df5..b1ddca8d36d 100644 --- a/src/http_stream.cpp +++ b/src/http_stream.cpp @@ -222,7 +222,7 @@ class JSON_sender #endif if (s == sock) // request on master socket, accept and send main header. { - SOCKADDR_IN address = { 0 }; + SOCKADDR_IN address = { }; SOCKET client = ::accept(sock, (SOCKADDR*)&address, &addrlen); if (client == SOCKET_ERROR) { @@ -250,7 +250,7 @@ class JSON_sender //"Content-Type: multipart/x-mixed-replace; boundary=boundary\r\n" "\r\n", 0); _write(client, "[\n", 0); // open JSON array - int n = _write(client, outputbuf, outlen); + (void)_write(client, outputbuf, outlen); cerr << "JSON_sender: new client " << client << endl; } else // existing client, just stream pix @@ -471,7 +471,7 @@ class MJPG_sender #endif if (s == sock) // request on master socket, accept and send main header. { - SOCKADDR_IN address = { 0 }; + SOCKADDR_IN address = { }; SOCKET client = ::accept(sock, (SOCKADDR*)&address, &addrlen); if (client == SOCKET_ERROR) { @@ -863,7 +863,7 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim // copy detections from queue of vectors to the one vector std::vector old_dets; for (std::vector &v : old_dets_dq) { - for (int i = 0; i < v.size(); ++i) { + for (size_t i = 0; i < v.size(); ++i) { old_dets.push_back(v[i]); } } @@ -871,7 +871,7 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim std::vector sim_det(old_dets.size() * new_dets_num); // calculate similarity - for (int old_id = 0; old_id < old_dets.size(); ++old_id) { + for (size_t old_id = 0; old_id < old_dets.size(); ++old_id) { for (int new_id = 0; new_id < new_dets_num; ++new_id) { const int index = old_id*new_dets_num + new_id; const float sim = cosine_similarity(new_dets[new_id].embeddings, old_dets[old_id].embeddings, old_dets[0].embedding_size); @@ -890,7 +890,7 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim std::vector track_idx(new_track_id, 1); // match objects - for (int index = 0; index < new_dets_num*old_dets.size(); ++index) { + for (int index = 0; index < new_dets_num* (int)old_dets.size(); ++index) { const int new_id = sim_det[index].new_id; const int old_id = sim_det[index].old_id; const int track_id = old_dets[old_id].track_id; @@ -926,7 +926,7 @@ void set_track_id(detection *new_dets, int new_dets_num, float thresh, float sim // add new old_dets_dq.push_back(new_det_vec); // remove old - if (old_dets_dq.size() > deque_size) old_dets_dq.pop_front(); + if ((int)old_dets_dq.size() > deque_size) old_dets_dq.pop_front(); // remove detection which were detected only on few frames for (int i = 0; i < new_dets_num; ++i) { diff --git a/src/image_opencv.cpp b/src/image_opencv.cpp index 22e6ca53570..2508d82078f 100644 --- a/src/image_opencv.cpp +++ b/src/image_opencv.cpp @@ -942,6 +942,7 @@ extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, flo float red = get_color(2, offset, classes); float green = get_color(1, offset, classes); float blue = get_color(0, offset, classes); +#if 0 float rgb[3]; //width = prob*20+2; @@ -949,6 +950,7 @@ extern "C" void draw_detections_cv_v3(mat_cv* mat, detection *dets, int num, flo rgb[0] = red; rgb[1] = green; rgb[2] = blue; +#endif box b = dets[i].bbox; if (std::isnan(b.w) || std::isinf(b.w)) b.w = 0.5; if (std::isnan(b.h) || std::isinf(b.h)) b.h = 0.5; @@ -1416,18 +1418,18 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int int it_trackbar_value = 200; std::string const it_trackbar_name = "iterations"; - int it_tb_res = cv::createTrackbar(it_trackbar_name, window_name, &it_trackbar_value, 1000); + (void)cv::createTrackbar(it_trackbar_name, window_name, &it_trackbar_value, 1000); int lr_trackbar_value = 10; std::string const lr_trackbar_name = "learning_rate exp"; - int lr_tb_res = cv::createTrackbar(lr_trackbar_name, window_name, &lr_trackbar_value, 20); + (void)cv::createTrackbar(lr_trackbar_name, window_name, &lr_trackbar_value, 20); int cl_trackbar_value = 0; std::string const cl_trackbar_name = "class_id"; - int cl_tb_res = cv::createTrackbar(cl_trackbar_name, window_name, &cl_trackbar_value, classes-1); + (void)cv::createTrackbar(cl_trackbar_name, window_name, &cl_trackbar_value, classes-1); std::string const bo_trackbar_name = "box-only"; - int bo_tb_res = cv::createTrackbar(bo_trackbar_name, window_name, boxonly, 1); + (void)cv::createTrackbar(bo_trackbar_name, window_name, boxonly, 1); int i = 0; @@ -1440,7 +1442,7 @@ extern "C" void cv_draw_object(image sized, float *truth_cpu, int max_boxes, int if (pressed_key == 27 || pressed_key == 1048603) break;// break; // ESC - save & exit frame_clone = frame.clone(); - char buff[100]; + //char buff[100]; std::string lr_value = "learning_rate = " + std::to_string(1.0 / pow(2, lr_trackbar_value)); cv::putText(frame_clone, lr_value, cv::Point2i(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(10, 50, 10), 3); cv::putText(frame_clone, lr_value, cv::Point2i(10, 20), cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(20, 120, 60), 2); diff --git a/src/layer.c b/src/layer.c index 758644b567b..517537dc9cf 100644 --- a/src/layer.c +++ b/src/layer.c @@ -205,7 +205,7 @@ void free_layer_custom(layer l, int keep_cudnn_desc) if (l.output_avg_gpu) cuda_free(l.output_avg_gpu), l.output_avg_gpu = NULL; if (l.activation_input_gpu) cuda_free(l.activation_input_gpu), l.activation_input_gpu = NULL; } - if (l.delta_gpu && (l.optimized_memory < 1 || l.keep_delta_gpu && l.optimized_memory < 3)) cuda_free(l.delta_gpu), l.delta_gpu = NULL; + if (l.delta_gpu && (l.optimized_memory < 1 || (l.keep_delta_gpu && l.optimized_memory < 3))) cuda_free(l.delta_gpu), l.delta_gpu = NULL; if (l.cos_sim_gpu) cuda_free(l.cos_sim_gpu); if (l.rand_gpu) cuda_free(l.rand_gpu); if (l.squared_gpu) cuda_free(l.squared_gpu); diff --git a/src/maxpool_layer_kernels.cu b/src/maxpool_layer_kernels.cu index 1d0d1bd9d2d..6a1d69aa3cc 100644 --- a/src/maxpool_layer_kernels.cu +++ b/src/maxpool_layer_kernels.cu @@ -202,7 +202,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta } if (layer.antialiasing) { - network_state s = { 0 }; + network_state s = { }; s.train = state.train; s.workspace = state.workspace; s.net = state.net; @@ -217,7 +217,7 @@ extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state sta extern "C" void backward_maxpool_layer_gpu(maxpool_layer layer, network_state state) { if (layer.antialiasing) { - network_state s = { 0 }; + network_state s = { }; s.train = state.train; s.workspace = state.workspace; s.net = state.net; diff --git a/src/network.c b/src/network.c index 40c1cbcb72b..0d2d17c1c54 100644 --- a/src/network.c +++ b/src/network.c @@ -244,7 +244,7 @@ network make_network(int n) network net = {0}; net.n = n; net.layers = (layer*)xcalloc(net.n, sizeof(layer)); - net.seen = (uint64_t*)xcalloc(1, sizeof(uint64_t)); + net.seen = (int64_t*)xcalloc(1, sizeof(int64_t)); net.cuda_graph_ready = (int*)xcalloc(1, sizeof(int)); net.badlabels_reject_threshold = (float*)xcalloc(1, sizeof(float)); net.delta_rolling_max = (float*)xcalloc(1, sizeof(float)); @@ -260,8 +260,8 @@ network make_network(int n) net.input16_gpu = (float**)xcalloc(1, sizeof(float*)); net.output16_gpu = (float**)xcalloc(1, sizeof(float*)); - net.max_input16_size = (size_t*)xcalloc(1, sizeof(size_t)); - net.max_output16_size = (size_t*)xcalloc(1, sizeof(size_t)); + net.max_input16_size = xcalloc(1, sizeof(ssize_t)); + net.max_output16_size = xcalloc(1, sizeof(ssize_t)); #endif return net; } @@ -432,7 +432,7 @@ float train_network_waitkey(network net, data d, int wait_key) if (net.ema_alpha && (*net.cur_iteration) >= ema_start_point) { - int ema_period = (net.max_batches - ema_start_point - 1000) * (1.0 - net.ema_alpha); + // int ema_period = (net.max_batches - ema_start_point - 1000) * (1.0 - net.ema_alpha); int ema_apply_point = net.max_batches - 1000; if (!is_ema_initialized(net)) @@ -503,7 +503,7 @@ int recalculate_workspace_size(network *net) if (gpu_index >= 0) cuda_free(net->workspace); #endif int i; - size_t workspace_size = 0; + int workspace_size = 0; for (i = 0; i < net->n; ++i) { layer l = net->layers[i]; //printf(" %d: layer = %d,", i, l.type); @@ -580,7 +580,7 @@ int resize_network(network *net, int w, int h) net->w = w; net->h = h; int inputs = 0; - size_t workspace_size = 0; + int workspace_size = 0; //fprintf(stderr, "Resizing to %d x %d...\n", w, h); //fflush(stderr); for (i = 0; i < net->n; ++i){ @@ -657,7 +657,7 @@ int resize_network(network *net, int w, int h) printf(" try to allocate additional workspace_size = %1.2f MB \n", (float)workspace_size / 1000000); net->workspace = cuda_make_array(0, workspace_size/sizeof(float) + 1); net->input_state_gpu = cuda_make_array(0, size); - if (cudaSuccess == cudaHostAlloc(&net->input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) + if (cudaSuccess == cudaHostAlloc((void**)&net->input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net->input_pinned_cpu_flag = 1; else { cudaGetLastError(); // reset CUDA-error @@ -1281,10 +1281,12 @@ void free_network(network net) #endif } +#if 0 static float relu(float src) { if (src > 0) return src; return 0; } +#endif static float lrelu(float src) { const float eps = 0.001; @@ -1313,7 +1315,7 @@ void fuse_conv_batchnorm(network net) double precomputed = l->scales[f] / (sqrt((double)l->rolling_variance[f] + .00001)); - const size_t filter_size = l->size*l->size*l->c / l->groups; + const int filter_size = l->size*l->size*l->c / l->groups; int i; for (i = 0; i < filter_size; ++i) { int w_index = f*filter_size + i; diff --git a/src/parser.c b/src/parser.c index 65606de6e74..3cd37f0af53 100644 --- a/src/parser.c +++ b/src/parser.c @@ -1180,7 +1180,7 @@ void parse_net_options(list *options, network *net) net->loss_scale = option_find_float_quiet(options, "loss_scale", 1); net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0); net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0); - net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024); // 1024 MB by default + net->workspace_size_limit = 1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024); // 1024 MB by default net->adam = option_find_int_quiet(options, "adam", 0); @@ -1361,7 +1361,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) #ifdef GPU printf("net.optimized_memory = %d \n", net.optimized_memory); if (net.optimized_memory >= 2 && params.train) { - pre_allocate_pinned_memory((size_t)1024 * 1024 * 1024 * 8); // pre-allocate 8 GB CPU-RAM for pinned memory + pre_allocate_pinned_memory(1024L * 1024 * 1024 * 8); // pre-allocate 8 GB CPU-RAM for pinned memory } #endif // GPU @@ -1383,9 +1383,9 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) int avg_outputs = 0; int avg_counter = 0; float bflops = 0; - size_t workspace_size = 0; - size_t max_inputs = 0; - size_t max_outputs = 0; + int workspace_size = 0; + int max_inputs = 0; + int max_outputs = 0; int receptive_w = 1, receptive_h = 1; int receptive_w_scale = 1, receptive_h_scale = 1; const int show_receptive_field = option_find_float_quiet(options, "show_receptive_field", 0); @@ -1711,7 +1711,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) layer l = net.layers[k]; // delta GPU-memory optimization: net.optimized_memory == 1 if (!l.keep_delta_gpu) { - const size_t delta_size = l.outputs*l.batch; // l.steps + const int delta_size = l.outputs*l.batch; // l.steps if (net.max_delta_gpu_size < delta_size) { net.max_delta_gpu_size = delta_size; if (net.global_delta_gpu) cuda_free(net.global_delta_gpu); @@ -1758,7 +1758,7 @@ network parse_network_cfg_custom(char *filename, int batch, int time_steps) { int size = get_network_input_size(net) * net.batch; net.input_state_gpu = cuda_make_array(0, size); - if (cudaSuccess == cudaHostAlloc(&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1; + if (cudaSuccess == cudaHostAlloc((void**)&net.input_pinned_cpu, size * sizeof(float), cudaHostRegisterMapped)) net.input_pinned_cpu_flag = 1; else { cudaGetLastError(); // reset CUDA-error net.input_pinned_cpu = (float*)xcalloc(size, sizeof(float)); @@ -1890,7 +1890,6 @@ void save_implicit_weights(layer l, FILE *fp) //printf("\n pull_implicit_layer \n"); } #endif - int i; //if(l.weight_updates) for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weight_updates[i]); //printf(" l.nweights = %d - update \n", l.nweights); //for (i = 0; i < l.nweights; ++i) printf(" %f, ", l.weights[i]); diff --git a/src/region_layer.c b/src/region_layer.c index 506cc9fc69b..65f599ceab0 100644 --- a/src/region_layer.c +++ b/src/region_layer.c @@ -59,8 +59,8 @@ region_layer make_region_layer(int batch, int w, int h, int n, int classes, int void resize_region_layer(layer *l, int w, int h) { #ifdef GPU - int old_w = l->w; - int old_h = l->h; + // int old_w = l->w; + // int old_h = l->h; #endif l->w = w; l->h = h; diff --git a/src/rnn.c b/src/rnn.c index ef2c7cca737..db87f0903cd 100644 --- a/src/rnn.c +++ b/src/rnn.c @@ -93,7 +93,7 @@ float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, si y[(j*batch + i)*characters + next] = 1; offsets[i] = (offsets[i] + 1) % len; - +#if 0 if(curr > 255 || curr <= 0 || next > 255 || next <= 0){ /*text[(index+j+2)%len] = 0; printf("%ld %d %d %d %d\n", index, j, len, (int)text[index+j], (int)text[index+j+1]); @@ -101,6 +101,7 @@ float_pair get_rnn_data(unsigned char *text, size_t *offsets, int characters, si */ error("Bad char", DARKNET_LOC); } +#endif } } float_pair p; diff --git a/src/shortcut_layer.c b/src/shortcut_layer.c index 87f0d7e8d76..b6b334dd228 100644 --- a/src/shortcut_layer.c +++ b/src/shortcut_layer.c @@ -52,7 +52,7 @@ layer make_shortcut_layer(int batch, int n, int *input_layers, int* input_sizes, if (l.nweights > 0) { l.weights = (float*)calloc(l.nweights, sizeof(float)); - float scale = sqrt(2. / l.nweights); + // float scale = sqrt(2. / l.nweights); for (i = 0; i < l.nweights; ++i) l.weights[i] = 1;// +0.01*rand_uniform(-1, 1);// scale*rand_uniform(-1, 1); // rand_normal(); if (train) l.weight_updates = (float*)calloc(l.nweights, sizeof(float)); diff --git a/src/softmax_layer.c b/src/softmax_layer.c index 6535c5780b3..848c7e8f86c 100644 --- a/src/softmax_layer.c +++ b/src/softmax_layer.c @@ -239,7 +239,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) for (w = 0; w < l.w; ++w) { // find truth with max prob (only 1 label even if mosaic is used) - float max_truth = 0; + // float max_truth = 0; int n; for (n = 0; n < l.classes; ++n) { const float truth_prob = state.truth[b*l.classes + n]; @@ -248,7 +248,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) if (truth_prob > truth_thresh) { //printf(" truth_prob = %f, max_truth = %f, n = %d; ", truth_prob, max_truth, n); - max_truth = truth_prob; + // max_truth = truth_prob; l.labels[b] = n; } } @@ -285,8 +285,8 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) int b2, n2, h2, w2; int contrast_p_index = 0; - const size_t step = l.batch*l.n*l.h*l.w; - size_t contrast_p_size = step; + const int step = l.batch*l.n*l.h*l.w; + int contrast_p_size = step; if (!l.detection) contrast_p_size = l.batch*l.batch; contrastive_params *contrast_p = (contrastive_params*)xcalloc(contrast_p_size, sizeof(contrastive_params)); @@ -319,7 +319,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) const int time_step_j = b2 / mini_batch; if (time_step_i != time_step_j) continue; - const size_t step = l.batch*l.n*l.h*l.w; + const int step = l.batch*l.n*l.h*l.w; const float sim = cosine_similarity(z[z_index], z[z_index2], l.embedding_size); const float exp_sim = expf(sim / l.temperature); @@ -412,7 +412,7 @@ void forward_contrastive_layer(contrastive_layer l, network_state state) */ - const size_t contr_size = contrast_p_index; + const int contr_size = contrast_p_index; if (l.detection) { #ifdef GPU diff --git a/src/super.c b/src/super.c index 35e7f6cf806..82232f0fd2c 100644 --- a/src/super.c +++ b/src/super.c @@ -81,11 +81,11 @@ void test_super(char *cfgfile, char *weightfile, char *filename) srand(2222222); clock_t time; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, 255); }else{ printf("Enter Image Path: "); fflush(stdout); diff --git a/src/tag.c b/src/tag.c index c1e031b2c8e..c17431a2b3b 100644 --- a/src/tag.c +++ b/src/tag.c @@ -101,12 +101,12 @@ void test_tag(char *cfgfile, char *weightfile, char *filename) char **names = get_labels("data/tags.txt"); clock_t time; int indexes[10]; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int size = net.w; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, 255); }else{ printf("Enter Image Path: "); fflush(stdout); diff --git a/src/utils.c b/src/utils.c index 2ad33d5e5ae..3750a8d54ca 100644 --- a/src/utils.c +++ b/src/utils.c @@ -209,7 +209,10 @@ void find_replace(const char* str, char* orig, char* rep, char* output) char* buffer = (char*)calloc(8192, sizeof(char)); char *p; - sprintf(buffer, "%s", str); + if (!buffer) + abort(); + + snprintf(buffer, 8191, "%s", str); if (!(p = strstr(buffer, orig))) { // Is 'orig' even in 'str'? sprintf(output, "%s", buffer); free(buffer); @@ -255,11 +258,14 @@ void find_replace_extension(char *str, char *orig, char *rep, char *output) { char* buffer = (char*)calloc(8192, sizeof(char)); - sprintf(buffer, "%s", str); + if (!buffer) + abort(); + + snprintf(buffer, 8191, "%s", str); char *p = strlaststr(buffer, orig); int offset = (p - buffer); int chars_from_end = strlen(buffer) - offset; - if (!p || chars_from_end != strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? + if (!p || chars_from_end != (int)strlen(orig)) { // Is 'orig' even in 'str' AND is 'orig' found at the end of 'str'? sprintf(output, "%s", buffer); free(buffer); return; @@ -1090,7 +1096,7 @@ unsigned long custom_hash(char *str) unsigned long hash = 5381; int c; - while (c = *str++) + while ((c = *str++)) hash = ((hash << 5) + hash) + c; /* hash * 33 + c */ return hash; diff --git a/src/voxel.c b/src/voxel.c index 9f50112be9a..7f927b6b334 100644 --- a/src/voxel.c +++ b/src/voxel.c @@ -22,10 +22,10 @@ void extract_voxel(char *lfile, char *rfile, char *prefix) } image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h); image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h); - char buff[256]; - sprintf(buff, "%s_%05d_l", prefix, count); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s_%05d_l", prefix, count); save_image(ls, buff); - sprintf(buff, "%s_%05d_r", prefix, count); + snprintf(buff, sizeof buff, "%s_%05d_r", prefix, count); save_image(rs, buff); free_image(l); free_image(r); @@ -90,19 +90,19 @@ void train_voxel(char *cfgfile, char *weightfile) printf("%d: %f, %f avg, %f rate, %lf seconds, %d images\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time), i*imgs); if(i%1000==0){ - char buff[256]; - sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s_%d.weights", backup_directory, base, i); save_weights(net, buff); } if(i%100==0){ - char buff[256]; - sprintf(buff, "%s/%s.backup", backup_directory, base); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s.backup", backup_directory, base); save_weights(net, buff); } free_data(train); } - char buff[256]; - sprintf(buff, "%s/%s_final.weights", backup_directory, base); + char buff[256] = { 0 }; + snprintf(buff, sizeof buff, "%s/%s_final.weights", backup_directory, base); save_weights(net, buff); } @@ -116,15 +116,15 @@ void test_voxel(char *cfgfile, char *weightfile, char *filename) srand(2222222); clock_t time; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); }else{ printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) return; strtok(input, "\n"); } diff --git a/src/writing.c b/src/writing.c index 1fed538fa14..2f9f78772c9 100644 --- a/src/writing.c +++ b/src/writing.c @@ -90,15 +90,15 @@ void test_writing(char *cfgfile, char *weightfile, char *filename) set_batch_network(&net, 1); srand(2222222); clock_t time; - char buff[256]; + char buff[256] = { 0 }; char *input = buff; while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); }else{ printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) return; strtok(input, "\n"); } diff --git a/src/yolo.c b/src/yolo.c index ef68acabc51..a3ca2eb6444 100644 --- a/src/yolo.c +++ b/src/yolo.c @@ -294,7 +294,7 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) detection_layer l = net.layers[net.n-1]; set_batch_network(&net, 1); srand(2222222); - char buff[256]; + char buff[256] = { 0 }; char *input = buff; int j; float nms=.4; @@ -305,11 +305,11 @@ void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh) } while(1){ if(filename){ - strncpy(input, filename, 256); + strncpy(input, filename, sizeof buff - 1); } else { printf("Enter Image Path: "); fflush(stdout); - input = fgets(input, 256, stdin); + input = fgets(input, sizeof buff - 1, stdin); if(!input) return; strtok(input, "\n"); } diff --git a/src/yolo_console_dll.cpp b/src/yolo_console_dll.cpp index 83df369657e..cf049790728 100644 --- a/src/yolo_console_dll.cpp +++ b/src/yolo_console_dll.cpp @@ -180,7 +180,7 @@ std::vector get_3d_coordinates(std::vector bbox_vect, cv::Mat xy void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector obj_names, int current_det_fps = -1, int current_cap_fps = -1) { - int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; + // int const colors[6][3] = { { 1,0,1 },{ 0,0,1 },{ 0,1,1 },{ 0,1,0 },{ 1,1,0 },{ 1,0,0 } }; for (auto &i : result_vec) { cv::Scalar color = obj_id_to_color(i.obj_id); @@ -189,7 +189,7 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector 0) obj_name += " - " + std::to_string(i.track_id); cv::Size const text_size = getTextSize(obj_name, cv::FONT_HERSHEY_COMPLEX_SMALL, 1.2, 2, 0); - int max_width = (text_size.width > i.w + 2) ? text_size.width : (i.w + 2); + int max_width = (text_size.width > (int)i.w + 2) ? text_size.width : (i.w + 2); max_width = std::max(max_width, (int)i.w + 2); //max_width = std::max(max_width, 283); std::string coords_3d; @@ -198,7 +198,7 @@ void draw_boxes(cv::Mat mat_img, std::vector result_vec, std::vector i.w + 2) ? text_size_3d.width : (i.w + 2); + int const max_width_3d = (text_size_3d.width > (int)i.w + 2) ? text_size_3d.width : (i.w + 2); if (max_width_3d > max_width) max_width = max_width_3d; } diff --git a/src/yolo_layer.c b/src/yolo_layer.c index de9d0990455..3077e85fde4 100644 --- a/src/yolo_layer.c +++ b/src/yolo_layer.c @@ -64,14 +64,14 @@ layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); free(l.output); - if (cudaSuccess == cudaHostAlloc(&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; + if (cudaSuccess == cudaHostAlloc((void**)&l.output, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.output_pinned = 1; else { cudaGetLastError(); // reset CUDA-error l.output = (float*)xcalloc(batch * l.outputs, sizeof(float)); } free(l.delta); - if (cudaSuccess == cudaHostAlloc(&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; + if (cudaSuccess == cudaHostAlloc((void**)&l.delta, batch*l.outputs*sizeof(float), cudaHostRegisterMapped)) l.delta_pinned = 1; else { cudaGetLastError(); // reset CUDA-error l.delta = (float*)xcalloc(batch * l.outputs, sizeof(float)); @@ -102,7 +102,7 @@ void resize_yolo_layer(layer *l, int w, int h) #ifdef GPU if (l->output_pinned) { CHECK_CUDA(cudaFreeHost(l->output)); - if (cudaSuccess != cudaHostAlloc(&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + if (cudaSuccess != cudaHostAlloc((void**)&l->output, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { cudaGetLastError(); // reset CUDA-error l->output = (float*)xcalloc(l->batch * l->outputs, sizeof(float)); l->output_pinned = 0; @@ -111,7 +111,7 @@ void resize_yolo_layer(layer *l, int w, int h) if (l->delta_pinned) { CHECK_CUDA(cudaFreeHost(l->delta)); - if (cudaSuccess != cudaHostAlloc(&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { + if (cudaSuccess != cudaHostAlloc((void**)&l->delta, l->batch*l->outputs * sizeof(float), cudaHostRegisterMapped)) { cudaGetLastError(); // reset CUDA-error l->delta = (float*)xcalloc(l->batch * l->outputs, sizeof(float)); l->delta_pinned = 0; @@ -422,7 +422,7 @@ void *process_batch(void* ptr) const int stride = l.w * l.h; box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w * l.h, l.new_coords); float best_match_iou = 0; - int best_match_t = 0; + // int best_match_t = 0; float best_iou = 0; int best_t = 0; for (t = 0; t < l.max_boxes; ++t) { @@ -442,7 +442,7 @@ void *process_batch(void* ptr) float iou = box_iou(pred, truth); if (iou > best_match_iou && class_id_match == 1) { best_match_iou = iou; - best_match_t = t; + // best_match_t = t; } if (iou > best_iou) { best_iou = iou; @@ -663,11 +663,11 @@ void forward_yolo_layer(const layer l, network_state state) { //int i, j, b, t, n; memcpy(l.output, state.input, l.outputs*l.batch * sizeof(float)); - int b, n; + int b; #ifndef GPU for (b = 0; b < l.batch; ++b) { - for (n = 0; n < l.n; ++n) { + for (int n = 0; n < l.n; ++n) { int bbox_index = entry_index(l, b, n*l.w*l.h, 0); if (l.new_coords) { //activate_array(l.output + bbox_index, 4 * l.w*l.h, LOGISTIC); // x,y,w,h @@ -691,18 +691,18 @@ void forward_yolo_layer(const layer l, network_state state) for (i = 0; i < l.batch * l.w*l.h*l.n; ++i) l.class_ids[i] = -1; //float avg_iou = 0; float tot_iou = 0; - float tot_giou = 0; - float tot_diou = 0; - float tot_ciou = 0; + // float tot_giou = 0; + // float tot_diou = 0; + // float tot_ciou = 0; float tot_iou_loss = 0; float tot_giou_loss = 0; - float tot_diou_loss = 0; - float tot_ciou_loss = 0; - float recall = 0; - float recall75 = 0; - float avg_cat = 0; - float avg_obj = 0; - float avg_anyobj = 0; + // float tot_diou_loss = 0; + // float tot_ciou_loss = 0; + // float recall = 0; + // float recall75 = 0; + // float avg_cat = 0; + // float avg_obj = 0; + // float avg_anyobj = 0; int count = 0; int class_count = 0; *(l.cost) = 0; diff --git a/src/yolo_v2_class.cpp b/src/yolo_v2_class.cpp index 19d381090ad..11b58487dd4 100644 --- a/src/yolo_v2_class.cpp +++ b/src/yolo_v2_class.cpp @@ -433,24 +433,24 @@ LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_v if (i.size() > 0) prev_track_id_present = true; if (!prev_track_id_present) { - for (size_t i = 0; i < cur_bbox_vec.size(); ++i) + for (int i = 0; i < (int)cur_bbox_vec.size(); ++i) cur_bbox_vec[i].track_id = det_gpu.track_id[cur_bbox_vec[i].obj_id]++; prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + if ((int)prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); return cur_bbox_vec; } - std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); + std::vector dist_vec(cur_bbox_vec.size(), std::numeric_limits::max()); for (auto &prev_bbox_vec : prev_bbox_vec_deque) { for (auto &i : prev_bbox_vec) { int cur_index = -1; - for (size_t m = 0; m < cur_bbox_vec.size(); ++m) { + for (int m = 0; m < (int)cur_bbox_vec.size(); ++m) { bbox_t const& k = cur_bbox_vec[m]; if (i.obj_id == k.obj_id) { float center_x_diff = (float)(i.x + i.w/2) - (float)(k.x + k.w/2); float center_y_diff = (float)(i.y + i.h/2) - (float)(k.y + k.h/2); - unsigned int cur_dist = sqrt(center_x_diff*center_x_diff + center_y_diff*center_y_diff); + int cur_dist = sqrt(center_x_diff*center_x_diff + center_y_diff*center_y_diff); if (cur_dist < max_dist && (k.track_id == 0 || dist_vec[m] > cur_dist)) { dist_vec[m] = cur_dist; cur_index = m; @@ -475,7 +475,7 @@ LIB_API std::vector Detector::tracking_id(std::vector cur_bbox_v if (change_history) { prev_bbox_vec_deque.push_front(cur_bbox_vec); - if (prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); + if ((int)prev_bbox_vec_deque.size() > frames_story) prev_bbox_vec_deque.pop_back(); } return cur_bbox_vec;