-
Notifications
You must be signed in to change notification settings - Fork 8
/
set_union.cpp
88 lines (77 loc) · 2.33 KB
/
set_union.cpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
#include <cstdio>
#include <chrono>
#include "projectconfig.h"
#include "constants.h"
#include "shuffle_dictionary.hpp"
#include "union/naive.hpp"
#include "union/stl.hpp"
#include "union/branchless.hpp"
#ifdef __SSE4_1__
# include "union/sse.hpp"
#endif
#if defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512DQ__)
# include "union/avx512.hpp"
#endif
void run(uint32_t **lists, size_t (*func)(const uint32_t*,size_t,const uint32_t*,size_t,uint32_t*)){
uint32_t *union_list = (uint32_t*)aligned_alloc(64, 2*arraySize*sizeof(uint32_t));
auto t_start = std::chrono::high_resolution_clock::now();
size_t union_count=0;
for(size_t i=0; i<listCount; ++i){
for(size_t j=i+1; j<listCount; ++j){
union_count += func(lists[i], arraySize, lists[j], arraySize, union_list);
}
}
auto t_end = std::chrono::high_resolution_clock::now();
printf("Wall clock time passed: %10.2f ms - %lu\n",
std::chrono::duration<double, std::milli>(t_end-t_start).count(),
union_count
);
free(union_list);
}
int main(){
auto t_start = std::chrono::high_resolution_clock::now();
// load lists from file which was generated by genLists
FILE *fd = fopen("test.dat", "rb");
if(!fd){
puts("couldn't open test.dat");
return -1;
}
uint32_t **lists = new uint32_t*[listCount];
for(size_t i=0; i<listCount; ++i){
lists[i] = (uint32_t*)aligned_alloc(64, arraySize*sizeof(uint32_t));
fread(lists[i], 4, arraySize, fd);
}
fclose(fd);
auto t_end = std::chrono::high_resolution_clock::now();
printf("preparing lists done - %f ms\n",
std::chrono::duration<double, std::milli>(t_end-t_start).count()
);
puts("naive scalar union:");
run(lists, union_scalar);
puts("stl set_union:");
run(lists, union_scalar_stl);
#if __GNUC__ >= 5
//puts("stl parallel set_union: uses more than one core, just for reference here");
//run(lists, union_scalar_stl_parallel);
#endif
#ifndef DISABLE_ASM
puts("asm branchless scalar union:");
run(lists, union_scalar_branchless);
#endif
#ifdef __SSE4_1__
puts("SSE union:");
run(lists, union_vector_sse);
#endif
#if defined(__AVX512F__) && defined(__AVX512CD__) && defined(__AVX512DQ__)
puts("512bit AVX512");
run(lists, union_vector_avx512_bitonic);
puts("512bit AVX512 - 2");
run(lists, union_vector_avx512_bitonic2);
#endif
// cleanup
for(size_t i=0; i<listCount; ++i){
free(lists[i]);
}
delete[] lists;
return 0;
}