-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.c
191 lines (164 loc) · 7.82 KB
/
main.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include "tokenizers_c.h"
#include "cJSON.h"
#include "onnxruntime_c_api.h"
#include <omp.h>
#include <mqueue.h>
#include <pthread.h>
// Project includes (folder include)
#include "preprocessor.h"
#include "read_data.h"
#include "postprocessor.h"
#include "model.h"
#include "tokenizer.h"
#include "preprocessor.h"
#include "read_data.h"
#include "paths.h"
#include "configs.h"
#include "parallel_processor.h"
// Ini variables for data
char** texts = NULL; // Array of strings containing texts to classify
size_t num_texts = 0; // Number of texts in the 'texts' array
char*** labels = NULL; // An array of labels for each text; there can be multiple labels for each text
size_t* num_labels = NULL; // Array containing the number of tags for each text
size_t num_labels_size = 0; // Total size of the array of labels
bool same_labels = false; // Flag indicating whether the same labels are used for all texts
char* classification_type = NULL; // Classification type (e.g. single-label, multi-label)
const OrtApi* g_ort = NULL; // Global pointer to ONNX Runtime API for performing model inference
// Mutex declarations
pthread_mutex_t queue_mutex;
// Buffers for input and output
OrtValue** input_ids_tensors = NULL;
OrtValue** attention_mask_tensors = NULL;
OrtValue** output_tensors = NULL;
/**
* Main function that runs the text classification model using ONNX Runtime.
* It reads input data from a JSON file, preprocesses the texts, tokenizes them, runs inference using the ONNX model,
* and processes the output logits to print the classification results. It supports multi-threading using OpenMP.
*
* @param argc The number of command-line arguments.
* @param argv An array of command-line arguments. argv[1] should be the path to the input JSON file.
* @return 0 if successful, or 1 if an error occurs (e.g., invalid arguments or failed initialization).
*/
int main(int argc, char *argv[]) {
if (argc < 3) {
printf("Usage: %s /path/to/your_data.json [prompt_first: true/false]\n", argv[0]);
printf("NOTE: use this option only if you sure that all required model parts are initialized correctly\n\n");
printf("Recomended option\n");
printf("Usage: ./run_GLiClass.sh knowledgator/gliclass-small-v1.0 /path/to/your_data.json\n");
printf("This option will automaticly set up prompt_first for you\n");
return 1;
}
// reading data from json file
char* json_string = read_file(argv[1]);
if (!json_string) {
return 1;
}
bool prompt_first = string_to_bool(argv[2]);
///////////// Prepare inputs /////////////
parse_json(json_string, &texts, &num_texts, &labels, &num_labels, &num_labels_size, &same_labels, &classification_type);
printf("DONE: parse_json;\n");
if (classification_type == NULL){
printf("classification type is not provided\n");
return 1;
}
free(json_string);
///////////// intializing part /////////////
TokenizerHandle tokenizer_handler = create_tokenizer(TOKENIZER_PATH);
if (!tokenizer_handler) {
return 1; // This error is created in create_tokenizer
}
printf("DONE: create_tokenizer;\n");
initialize_ort_api();
printf("DONE: initialize_ort_api;\n");
OrtEnv* env = initialize_ort_environment();
if (env == NULL) {
fprintf(stderr, "Error: Failed to initialize ONNX Runtime.\n");
return -1;
}
printf("DONE: initialize_ort_environment;\n");
OrtSession* session = create_ort_session(env, MODEL_PATH, NUM_THREADS);
if (session == NULL) {
fprintf(stderr, "Error: Failed to create session ONNX Runtime.\n");
g_ort->ReleaseEnv(env);
return -1;
}
printf("DONE: create_ort_session;\n\n");
/////////////////////////////////////////////////////////
//////////////////// INFERENCE START ////////////////////
// Initialize queue mutex
pthread_mutex_init(&queue_mutex, NULL);
// Allocate memory for tensors
size_t num_batches = (num_texts + BATCH_SIZE - 1) / BATCH_SIZE;
input_ids_tensors = malloc(sizeof(OrtValue*) * num_batches);
attention_mask_tensors = malloc(sizeof(OrtValue*) * num_batches);
output_tensors = malloc(sizeof(OrtValue*) * num_batches);
double start_time, end_time;
start_time = omp_get_wtime();
// Parallel preprocessing
parallel_preprocess(texts, labels, num_labels, num_texts,
same_labels, prompt_first, tokenizer_handler,
input_ids_tensors, attention_mask_tensors);
// #pragma omp parallel for schedule(dynamic)
// for (size_t i = 0; i < num_texts; i += BATCH_SIZE) {
// size_t current_batch_size = (i + BATCH_SIZE > num_texts) ? (num_texts - i) : BATCH_SIZE;
// // Prepare input data
// const char** batch_texts = (const char**)&texts[i];
// const char*** batch_labels = (const char***)(same_labels ? (void*)labels : (void*)&labels[i]);
// size_t* batch_num_labels = (same_labels) ? num_labels : &num_labels[i];
// // Prepare tokens
// const char** prepared_inputs = prepare_inputs(batch_texts, batch_labels, current_batch_size, batch_num_labels, same_labels, prompt_first);
// TokenizedInputs tokenized = tokenize_inputs(tokenizer_handler, prepared_inputs, current_batch_size, MAX_LENGTH);
// // Prepare input tensors
// prepare_input_tensors(&tokenized, &input_ids_tensors[i / BATCH_SIZE], &attention_mask_tensors[i / BATCH_SIZE]);
// // Clean up memory
// free_prepared_inputs((char**)prepared_inputs, current_batch_size);
// free_tokenized_inputs(&tokenized);
// }
// Inference stage - processing batches
#pragma omp parallel for schedule(dynamic)
for (size_t i = 0; i < num_batches; i++) {
#ifdef USE_CUDA // GPU
pthread_mutex_lock(&queue_mutex);
output_tensors[i] = run_inference(session, input_ids_tensors[i], attention_mask_tensors[i]);
pthread_mutex_unlock(&queue_mutex);
#else
output_tensors[i] = run_inference(session, input_ids_tensors[i], attention_mask_tensors[i]);
#endif
}
// Postprocess stage - processing batches
parallel_postprocess(output_tensors, num_batches, num_texts,
texts, labels, num_labels,
same_labels, num_labels_size, classification_type);
// #pragma omp parallel for schedule(dynamic)
// for (size_t i = 0; i < num_batches; i++) {
// size_t current_batch_size = (i == num_batches - 1) ? (num_texts - i * BATCH_SIZE) : BATCH_SIZE;
// const char** batch_texts = (const char**)&texts[i * BATCH_SIZE];
// const char*** batch_labels = (const char***)(same_labels ? (void*)labels : (void*)&labels[i * BATCH_SIZE]);
// size_t* batch_num_labels = (same_labels) ? num_labels : &num_labels[i * BATCH_SIZE];
// process_output_tensor(output_tensors[i], g_ort, same_labels, batch_labels, batch_num_labels, num_labels_size, THRESHOLD,
// current_batch_size, batch_texts, classification_type);
// // Free output tensor after processing
// g_ort->ReleaseValue(output_tensors[i]);
// }
end_time = omp_get_wtime();
printf("Execution time: %f seconds\n", end_time - start_time);
// Free resources
for (size_t i = 0; i < num_batches; i++) {
g_ort->ReleaseValue(input_ids_tensors[i]);
g_ort->ReleaseValue(attention_mask_tensors[i]);
// g_ort->ReleaseValue(output_tensors[i]);
}
free(input_ids_tensors);
free(attention_mask_tensors);
free(output_tensors);
// Free tokenizer
tokenizers_free(tokenizer_handler);
// Free onnx
g_ort->ReleaseSession(session);
g_ort->ReleaseEnv(env);
pthread_mutex_destroy(&queue_mutex);
return 0;
}