forked from libxsmm/parlooper
-
Notifications
You must be signed in to change notification settings - Fork 0
/
par_loop_cost_estimator.h
89 lines (78 loc) · 2.38 KB
/
par_loop_cost_estimator.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
/******************************************************************************
* Copyright (c) Intel Corporation - All rights reserved. *
* This file is part of the LIBXSMM library. *
* *
* For information on the license, see the LICENSE file. *
* Further information: https://github.com/libxsmm/libxsmm/ *
* SPDX-License-Identifier: BSD-3-Clause *
******************************************************************************/
#ifndef _PAR_LOOP_COST_ESIMATOR_H_
#define _PAR_LOOP_COST_ESIMATOR_H_
#include <string>
#include <vector>
typedef enum cost_analysis_type {
SINGLE_TRACE = 1,
PARALLEL_TRACES = 2,
CONCURRENT_TRACES = 3
} cost_analysis_type;
typedef enum mem_hierarchy_type {
MEM_L2_L1 = 1,
MEM_LLC_L2_L1 = 2
} mem_hierarchy_type;
typedef enum platform_type { CLX = 1 } platform_type;
typedef enum mem_hierarchy_loc {
L1 = 1,
L2 = 2,
LLC = 3,
MEM = 4
} mem_hierarchy_loc;
typedef struct data_volume_t {
int l1_bytes;
int l2_bytes;
int llc_bytes;
int mem_bytes;
int mixed_mem_llc_bytes;
} data_volume_t;
typedef struct platform_spec_t {
double mixed_mem_llc_bytes_per_cycle;
double mem_bytes_per_cycle;
double llc_bytes_per_cycle;
double l2_bytes_per_cycle;
double l1_bytes_per_cycle;
double llc_size_in_bytes;
double l2_size_in_bytes;
double l1_size_in_bytes;
int n_threads;
mem_hierarchy_type mem_hierarchy;
double flops_per_cycle_out_of_L1;
double bf16_flops_per_cycle_out_of_L2;
double fp32_flops_per_cycle_out_of_L2;
double freq_in_ghz;
} platform_spec_t;
typedef struct tensor_metadata_t {
int subtensor_a_size_bytes;
int subtensor_b_size_bytes;
int subtensor_c_size_bytes;
int M;
int N;
int K;
int brcount;
int dtype_size;
} tensor_metadata_t;
void set_platform_specs(
platform_type platform,
int n_threads,
platform_spec_t* platform_specs);
void set_tensor_metadata(
int M,
int N,
int K,
int brcount,
int dtype_size,
tensor_metadata_t* tensor_metadata);
double tensor_contraction_cost_estimator(
cost_analysis_type analysis_type,
std::vector<std::string>* traces_array,
tensor_metadata_t tensor_metadata,
platform_spec_t platform_spec);
#endif // _PAR_LOOP_COST_ESIMATOR_H_