This repository has been archived by the owner on Dec 16, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrocm_ucc_config
158 lines (155 loc) · 5.25 KB
/
rocm_ucc_config
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
### VERSION ###
# UCC version=1.4.0 revision 4c72697
# Configured with: --prefix=/usr --with-ucx=/usr --with-rocm=/opt/rocm --with-rocm-arch=--offload-arch=gfx1011 --with-tls=ucp
### BUILD ###
#define HAVE_ALLOCA 1
#define HAVE_ALLOCA_H 1
#define HAVE_ATTRIBUTE_NOOPTIMIZE 1
#define HAVE_DECL_ASPRINTF 1
#define HAVE_DECL_BASENAME 1
#define HAVE_DECL_FMEMOPEN 1
#define HAVE_DLFCN_H 1
#define HAVE_HIP 1
#define HAVE_HIP_HIP_RUNTIME_H 1
#define HAVE_HSA_EXT_AMD_H 1
#define HAVE_HSA_H 1
#define HAVE_INTTYPES_H 1
#define HAVE_LIBGEN_H 1
#define HAVE_LIBRT 1
#define HAVE_PROFILING_TL_UCP 1
#define HAVE_ROCM 1
#define HAVE_STDINT_H 1
#define HAVE_STDIO_H 1
#define HAVE_STDLIB_H 1
#define HAVE_STRINGS_H 1
#define HAVE_STRING_H 1
#define HAVE_SYS_STAT_H 1
#define HAVE_SYS_TYPES_H 1
#define HAVE_UCP_API_UCP_H 1
#define HAVE_UCS_GET_SYSTEM_ID 1
#define HAVE_UCS_SYS_UID_H 1
#define HAVE_UCX 1
#define HAVE_UNISTD_H 1
#define HAVE_WCHAR_H 1
#define LT_OBJDIR ".libs/"
#define NVALGRIND 1
#define PACKAGE "ucc"
#define PACKAGE_BUGREPORT ""
#define PACKAGE_NAME "ucc"
#define PACKAGE_STRING "ucc 1.4"
#define PACKAGE_TARNAME "ucc"
#define PACKAGE_URL ""
#define PACKAGE_VERSION "1.4"
#define SIZEOF_CUDOUBLECOMPLEX 0
#define SIZEOF_CUFLOATCOMPLEX 0
#define SIZEOF_DOUBLE 8
#define SIZEOF_DOUBLE__COMPLEX 16
#define SIZEOF_FLOAT 4
#define SIZEOF_FLOAT__COMPLEX 8
#define SIZEOF_LONG_DOUBLE 16
#define SIZEOF_LONG_DOUBLE__COMPLEX 32
#define STDC_HEADERS 1
#define UCC_BIG_ENDIAN 0
#define UCC_CONFIGURE_FLAGS "--prefix=/usr --with-ucx=/usr --with-rocm=/opt/rocm --with-rocm-arch=--offload-arch=gfx1011 --with-tls=ucp"
#define UCC_MODULE_SUBDIR "ucc"
#define UCS_HAVE_CONFIG_GLOBAL_LIST_ENTRY_FLAGS 1
#define UCS_HAVE_MPOOL_PARAMS 1
#define UCS_HAVE_PARSER_CONFIG_DOC 1
#define UCS_HAVE_PARSER_SET_VALUE_TABLE_PREFIX 1
#define UCS_HAVE_RCACHE_MERGE_CB 1
#define UCS_HAVE_RCACHE_REGION_ALIGNMENT 1
#define UCS_MAX_LOG_LEVEL UCS_LOG_LEVEL_DEBUG
#define VERSION "1.4"
### CONFIG ###
UCC_CLS=basic
UCC_LOG_LEVEL=WARN
UCC_COLL_TRACE=WARN
UCC_PROFILE_MODE=
UCC_PROFILE_FILE=ucc_%h_%p.prof
UCC_PROFILE_LOG_SIZE=4M
UCC_CONFIG_FILE=auto
UCC_ESTIMATED_NUM_EPS=0
UCC_LOCK_FREE_PROGRESS_Q=0
UCC_ESTIMATED_NUM_PPN=0
UCC_TEAM_IDS_POOL_SIZE=32
UCC_INTERNAL_OOB=1
UCC_THROTTLE_PROGRESS=1000
UCC_CL_BASIC_LOG_LEVEL=WARN
UCC_CL_BASIC_USE_TUNING=y
UCC_CL_BASIC_MIN_TEAM_SIZE=auto
UCC_CL_BASIC_TLS=all
UCC_CL_BASIC_TUNE=
UCC_CL_HIER_LOG_LEVEL=WARN
UCC_CL_HIER_USE_TUNING=y
UCC_CL_HIER_MIN_TEAM_SIZE=auto
UCC_CL_HIER_TLS=all
UCC_CL_HIER_NODE_SBGP_TLS=ucp
UCC_CL_HIER_NODE_LEADERS_SBGP_TLS=ucp
UCC_CL_HIER_NET_SBGP_TLS=ucp
UCC_CL_HIER_FULL_SBGP_TLS=ucp
UCC_CL_HIER_ALLTOALLV_SPLIT_NODE_THRESH=0
UCC_CL_HIER_ALLREDUCE_SPLIT_RAIL_PIPELINE=n
UCC_CL_HIER_ALLREDUCE_RAB_PIPELINE=n
UCC_CL_HIER_BCAST_2STEP_PIPELINE=n
UCC_CL_HIER_REDUCE_2STEP_PIPELINE=n
UCC_CL_HIER_TUNE=
UCC_TL_UCP_LOG_LEVEL=WARN
UCC_TL_UCP_USE_TUNING=y
UCC_TL_UCP_MIN_TEAM_SIZE=auto
UCC_TL_UCP_ALLTOALL_PAIRWISE_NUM_POSTS=auto
UCC_TL_UCP_ALLTOALLV_PAIRWISE_NUM_POSTS=auto
UCC_TL_UCP_ALLTOALLV_HYBRID_NUM_SCRATCH_SENDS=1
UCC_TL_UCP_ALLTOALLV_HYBRID_NUM_SCRATCH_RECVS=3
UCC_TL_UCP_ALLTOALLV_HYBRID_PAIRWISE_NUM_POSTS=3
UCC_TL_UCP_ALLTOALLV_HYBRID_BUFF_SIZE=256K
UCC_TL_UCP_ALLTOALLV_HYBRID_CHUNK_BYTE_LIMIT=12K
UCC_TL_UCP_KN_RADIX=0
UCC_TL_UCP_BARRIER_KN_RADIX=8
UCC_TL_UCP_FANIN_KN_RADIX=4
UCC_TL_UCP_FANOUT_KN_RADIX=4
UCC_TL_UCP_ALLREDUCE_KN_RADIX=auto
UCC_TL_UCP_ALLREDUCE_SLIDING_WIN_BUF_SIZE=64K
UCC_TL_UCP_ALLREDUCE_SLIDING_WIN_PUT_WINDOW_SIZE=0
UCC_TL_UCP_ALLREDUCE_SLIDING_WIN_NUM_GET_BUFS=0
UCC_TL_UCP_ALLREDUCE_SRA_KN_RADIX=auto
UCC_TL_UCP_ALLREDUCE_SRA_KN_PIPELINE=auto
UCC_TL_UCP_REDUCE_SCATTER_KN_RADIX=4
UCC_TL_UCP_ALLGATHER_KN_RADIX=4
UCC_TL_UCP_BCAST_KN_RADIX=4
UCC_TL_UCP_BCAST_SAG_KN_RADIX=auto
UCC_TL_UCP_REDUCE_KN_RADIX=4
UCC_TL_UCP_GATHER_KN_RADIX=4
UCC_TL_UCP_GATHERV_LINEAR_NUM_POSTS=0
UCC_TL_UCP_SCATTER_KN_RADIX=4
UCC_TL_UCP_SCATTER_KN_ENABLE_RECV_ZCOPY=auto
UCC_TL_UCP_SCATTERV_LINEAR_NUM_POSTS=16
UCC_TL_UCP_REDUCE_AVG_PRE_OP=y
UCC_TL_UCP_REDUCE_SCATTER_RING_BIDIRECTIONAL=y
UCC_TL_UCP_REDUCE_SCATTERV_RING_BIDIRECTIONAL=y
UCC_TL_UCP_USE_TOPO=try
UCC_TL_UCP_RANKS_REORDERING=y
UCC_TL_UCP_TUNE=
UCC_TL_UCP_PRECONNECT=0
UCC_TL_UCP_NPOLLS=10
UCC_TL_UCP_OOB_NPOLLS=20
UCC_TL_UCP_PRE_REG_MEM=0
UCC_TL_UCP_SERVICE_WORKER=n
UCC_TL_UCP_SERVICE_THROTTLING_THRESH=100
UCC_MC_CPU_LOG_LEVEL=WARN
UCC_MC_CPU_MPOOL_ELEM_SIZE=1M
UCC_MC_CPU_MPOOL_MAX_ELEMS=8
UCC_MC_ROCM_LOG_LEVEL=WARN
UCC_MC_ROCM_MPOOL_ELEM_SIZE=1M
UCC_MC_ROCM_MPOOL_MAX_ELEMS=8
UCC_EC_CPU_LOG_LEVEL=WARN
UCC_EC_ROCM_LOG_LEVEL=WARN
UCC_EC_ROCM_EXEC_NUM_WORKERS=1
UCC_EC_ROCM_EXEC_NUM_THREADS=512
UCC_EC_ROCM_EXEC_MAX_TASKS=128
UCC_EC_ROCM_EXEC_NUM_STREAMS=8
UCC_EC_ROCM_REDUCE_NUM_BLOCKS=auto
UCC_EC_ROCM_REDUCE_HOST_LIMIT=256
UCC_EC_ROCM_COPY_HOST_LIMIT=0
### SCORES ###
Default CLs scores: basic=10 hier=50
Default TLs scores: ucp=10