Skip to content

Commit

Permalink
added some more compiler options
Browse files Browse the repository at this point in the history
  • Loading branch information
alheinecke committed Apr 10, 2023
1 parent d6257b0 commit 485cb48
Show file tree
Hide file tree
Showing 3 changed files with 59 additions and 9 deletions.
3 changes: 2 additions & 1 deletion level0/readbw/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,8 @@ CFLAGS = -O2 -fopenmp -DNTIMES=100 -DSTREAM_ARRAY_SIZE=64000000 -mcpu=power8 -fs
#CFLAGS = -O2 -fopenmp -DNTIMES=100 -DSTREAM_ARRAY_SIZE=64000000 -mavx2 -fstrict-aliasing
#CFLAGS = -O2 -fopenmp -DNTIMES=100 -DSTREAM_ARRAY_SIZE=64000000 -msse3 -fstrict-aliasing
CC = icc
CFLAGS = -O3 -xCOMMON-AVX512 -qopenmp -DNTIMES=1000 -DSTREAM_ARRAY_SIZE=100000000 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-prefetch-distance=64,8 -qopenmp-link=static
CFLAGS = -O3 -xCOMMON-AVX512 -qopenmp -DNTIMES=10000 -DSTREAM_ARRAY_SIZE=199808000 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-prefetch-distance=64,8 -qopenmp-link=static
CFLAGS = -O3 -xCOMMON-AVX512 -qopenmp -DNTIMES=10000 -DSTREAM_ARRAY_SIZE=199808000 -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-prefetch=5 -qopt-prefetch-distance=448,32 -qopenmp-link=static

all: readbw.exe

Expand Down
64 changes: 56 additions & 8 deletions level0/readbw/readbw.c
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@
#define USE_CORE_PERF_COUNTERS
#endif

#if 0
#include <immintrin.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#include <sys/time.h>
Expand All @@ -50,11 +53,11 @@

#ifdef NTIMES
#if NTIMES<=1
# define NTIMES 1000
# define NTIMES 10000
#endif
#endif
#ifndef NTIMES
# define NTIMES 1000
# define NTIMES 10000
#endif

# ifndef MIN
Expand All @@ -70,6 +73,10 @@ inline double sec(struct timeval start, struct timeval end) {

int main(int argc, char* argv[]) {
double* l_data;
#if 0
double* l_data2;
double* l_data3;
#endif
size_t l_n = 0;
size_t l_i = 0;
double* l_times;
Expand Down Expand Up @@ -103,15 +110,30 @@ int main(int argc, char* argv[]) {
#endif

l_sum = ((l_sum*l_sum) + l_sum)/2;
posix_memalign((void**)&l_data, 4096, ((size_t)STREAM_ARRAY_SIZE)*sizeof(double));
#if 0
l_sum *= 2;
#endif
posix_memalign((void**)&l_data, 2097152, ((size_t)STREAM_ARRAY_SIZE)*sizeof(double));
#if 0
posix_memalign((void**)&l_data2, 2097152, ((size_t)STREAM_ARRAY_SIZE)*sizeof(double));
posix_memalign((void**)&l_data3, 2097152, ((size_t)STREAM_ARRAY_SIZE)*sizeof(double));
#endif
l_times = (double*)malloc(sizeof(double)*NTIMES);

#if 1
printf("READ BW Test Size MiB: %f\n", (l_size/(1024.0*1024.0)));

#else
printf("READ BW Test Size MiB: %f\n", (l_size*2/(1024.0*1024.0)));
#endif

// init data
#pragma omp parallel for
for ( l_n = 0; l_n < STREAM_ARRAY_SIZE; l_n++ ) {
l_data[l_n] = (double)l_n;
l_data[l_n] = (float)l_n;
#if 0
l_data2[l_n] = (float)l_n;
l_data3[l_n] = (float)l_n;
#endif
}

#ifdef USE_UNCORE_PERF_COUNTERS
Expand All @@ -128,11 +150,31 @@ int main(int argc, char* argv[]) {
#pragma omp parallel
{
double l_res = 0.0;
#pragma omp for
#if 0
double l_res2 = 0.0;
__m512d l_acc = _mm512_setzero_pd();
__m512d l_acc2 = _mm512_setzero_pd();
__m512d l_acc3 = _mm512_setzero_pd();
#endif
#pragma omp for nowait
for ( l_n = 0; l_n < STREAM_ARRAY_SIZE; l_n++ ) {
l_res += l_data[l_n];
#if 0
l_acc = _mm512_add_pd( l_acc, _mm512_load_pd( l_data + l_n ));
l_acc2 = _mm512_add_pd( l_acc2, _mm512_load_pd( l_data2 + l_n ));
l_acc3 = _mm512_add_pd( l_acc3, _mm512_load_pd( l_data3 + l_n ));
_mm_prefetch( l_data + ((l_n + 2048) % STREAM_ARRAY_SIZE), _MM_HINT_T1 );
#endif
l_res += l_data[l_n];
#if 0
l_res2 += l_data2[l_n];
#endif
}

#if 0
l_res += l_res2;
l_res = _mm512_reduce_add_pd( l_acc );
l_res += _mm512_reduce_add_pd( l_acc2 );
l_res += _mm512_reduce_add_pd( l_acc3 );
#endif
#pragma omp atomic
l_result += l_res;
}
Expand Down Expand Up @@ -162,9 +204,15 @@ int main(int argc, char* argv[]) {
l_avgTime /= (double)NTIMES;

// output
#if 1
printf("AVG GiB/s (calculated): %f\n", (l_size/(1024.0*1024.0*1024.0))/l_avgTime);
printf("MAX GiB/s (calculated): %f\n", (l_size/(1024.0*1024.0*1024.0))/l_minTime);
printf("MIN GiB/s (calculated): %f\n", (l_size/(1024.0*1024.0*1024.0))/l_maxTime);
#else
printf("AVG GiB/s (calculated): %f\n", (l_size*2.0/(1024.0*1024.0*1024.0))/l_avgTime);
printf("MAX GiB/s (calculated): %f\n", (l_size*2.0/(1024.0*1024.0*1024.0))/l_minTime);
printf("MIN GiB/s (calculated): %f\n", (l_size*2.0/(1024.0*1024.0*1024.0))/l_maxTime);
#endif
#ifdef USE_UNCORE_PERF_COUNTERS
#ifdef USE_DRAM_COUNTERS
get_cas_ddr_bw_uncore_ctrs( &s, l_maxTime, &bw_min );
Expand Down
1 change: 1 addition & 0 deletions level0/stream/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ CC=icc
#CFLAGS=-O2 -xCOMMON-AVX512 -qopenmp -DBENCH_AVX512 -DNTIMES=1000 -DSTREAM_ARRAY_SIZE=73740288 -fstrict-aliasing -opt-streaming-cache-evict=0 -opt-streaming-stores always -opt-prefetch-distance=64,8 -static-intel
#CFLAGS=-O2 -xCOMMON-AVX512 -qopenmp -DBENCH_AVX512 -DNTIMES=10000 -DSTREAM_ARRAY_SIZE=2752512 -fstrict-aliasing -opt-streaming-cache-evict=0 -opt-streaming-stores never -opt-prefetch-distance=64,8 -static-intel
CFLAGS=-O2 -xCOMMON-AVX512 -qopenmp -DBENCH_AVX512 -DNTIMES=1000 -DSTREAM_ARRAY_SIZE=64512000 -fstrict-aliasing -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-prefetch-distance=64,8 -static-intel
CFLAGS=-O2 -xCOMMON-AVX512 -qopenmp -DBENCH_AVX512 -DNTIMES=1000 -DSTREAM_ARRAY_SIZE=64512000 -fstrict-aliasing -qopt-streaming-cache-evict=0 -qopt-streaming-stores always -qopt-prefetch=5 -qopt-prefetch-distance=448,32 -static-intel
EXE=stream_avx512
endif

Expand Down

0 comments on commit 485cb48

Please sign in to comment.