-
Notifications
You must be signed in to change notification settings - Fork 98
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Vector softmax #1172
Vector softmax #1172
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,121 @@ | ||
# | ||
# This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||
# See https://llvm.org/LICENSE.txt for license information. | ||
# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||
# | ||
# (c) Copyright 2023 AMD Inc. | ||
|
||
import sys | ||
|
||
from aie.dialects.aie import * | ||
from aie.dialects.aiex import * | ||
from aie.dialects.scf import * | ||
from aie.extras.context import mlir_mod_ctx | ||
|
||
|
||
def my_eltwise_add(): | ||
|
||
word_size_in = 2 | ||
N = 65536 #*1024 | ||
N_in_bytes = N * word_size_in | ||
|
||
A_sz_in_i32s = N_in_bytes // 4 | ||
C_sz_in_i32s = N_in_bytes // 4 | ||
|
||
# Tile sizes | ||
n = 1024 | ||
N_div_n = N // n | ||
|
||
n_cores = 4 | ||
tiles = N_div_n // n_cores | ||
buffer_depth = 2 | ||
|
||
with mlir_mod_ctx() as ctx: | ||
|
||
@device(AIEDevice.ipu) | ||
def device_body(): | ||
memRef_ty = T.memref(n, T.bf16()) | ||
|
||
# Type used in the tile memory | ||
memRef_A_ty = T.memref(n, T.bf16()) | ||
memRef_C_ty = T.memref(n, T.bf16()) | ||
|
||
# Type used in the memory tile which aggregates across the 4 cores | ||
memRef_A_MT_ty = T.memref(n * n_cores, T.bf16()) | ||
memRef_C_MT_ty = T.memref(n * n_cores, T.bf16()) | ||
|
||
# AIE Core Function declarations | ||
|
||
exp_bf16_vector = external_func("exp_bf16_vector", inputs=[memRef_ty, memRef_ty]) | ||
|
||
# Tile declarations | ||
ShimTile = tile(0, 0) | ||
|
||
MemTile = tile(0, 1) | ||
cores = [tile(0, 2 + i) for i in range(n_cores)] | ||
|
||
inA_fifo_names = [f"memA{i}" for i in range(n_cores)] | ||
outC_fifo_names = [f"memC{i}" for i in range(n_cores)] | ||
|
||
inA_fifos = {} | ||
outC_fifos = {} | ||
|
||
# AIE-array data movement with object fifos | ||
# Input A | ||
inA = object_fifo("inA", ShimTile, MemTile, buffer_depth, memRef_A_MT_ty) | ||
for i in range(n_cores): | ||
inA_fifos[inA_fifo_names[i]] = object_fifo( | ||
inA_fifo_names[i], MemTile, cores[i], buffer_depth, memRef_A_ty | ||
) | ||
object_fifo_link(inA, inA_fifo_names) | ||
|
||
# Output C | ||
for i in range(n_cores): | ||
outC_fifos[outC_fifo_names[i]] = object_fifo( | ||
outC_fifo_names[i], cores[i], MemTile, buffer_depth, memRef_C_ty | ||
) | ||
outC = object_fifo("outC", MemTile, ShimTile, buffer_depth, memRef_C_MT_ty) | ||
object_fifo_link(outC_fifo_names[0:n_cores], outC) | ||
|
||
# Set up compute tiles | ||
for i in range(n_cores): | ||
# Compute tile i | ||
@core(cores[i], "kernels.a") | ||
def core_body(): | ||
for _ in for_(0xFFFFFFFF): | ||
for _ in for_(tiles): | ||
elem_out = outC_fifos[outC_fifo_names[i]].acquire( | ||
ObjectFifoPort.Produce, 1 | ||
) | ||
elem_in_a = inA_fifos[inA_fifo_names[i]].acquire( | ||
ObjectFifoPort.Consume, 1 | ||
) | ||
|
||
call(exp_bf16_vector,[elem_in_a, elem_out]) | ||
|
||
inA_fifos[inA_fifo_names[i]].release( | ||
ObjectFifoPort.Consume, 1 | ||
) | ||
outC_fifos[outC_fifo_names[i]].release( | ||
ObjectFifoPort.Produce, 1 | ||
) | ||
yield_([]) | ||
yield_([]) | ||
|
||
# To/from AIE-array data movement | ||
tensor_ty = T.memref(N, T.i32()) | ||
|
||
@FuncOp.from_py_func(tensor_ty, tensor_ty) | ||
def sequence(A, C): | ||
ipu_dma_memcpy_nd( | ||
metadata="outC", bd_id=0, mem=C, sizes=[1, 1, 1, C_sz_in_i32s] | ||
) | ||
ipu_dma_memcpy_nd( | ||
metadata="inA", bd_id=1, mem=A, sizes=[1, 1, 1, A_sz_in_i32s] | ||
) | ||
ipu_sync(column=0, row=0, direction=0, channel=0) | ||
|
||
print(ctx.module) | ||
|
||
|
||
my_eltwise_add() |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,34 @@ | ||
module { | ||
func.func @dut(%arg0: memref<1024xbf16>, %arg1: memref<1024xbf16>) { | ||
%cst = arith.constant 0.000000e+00 : f32 | ||
%cst_0 = arith.constant 1.000000e+00 : f32 | ||
%cst_1 = arith.constant 0.000000e+00 : bf16 | ||
%cst_2 = arith.constant dense<0xFF80> : vector<32xbf16> | ||
%0 = affine.for %arg2 = 0 to 1024 step 32 iter_args(%arg3 = %cst_2) -> (vector<32xbf16>) { | ||
%5 = vector.transfer_read %arg0[%arg2], %cst_1 : memref<1024xbf16>, vector<32xbf16> | ||
%6 = arith.maximumf %arg3, %5 : vector<32xbf16> | ||
affine.yield %6 : vector<32xbf16> | ||
} | ||
%1 = vector.reduction <maximumf>, %0 : vector<32xbf16> into bf16 | ||
affine.for %arg2 = 0 to 1024 { | ||
%5 = affine.load %arg0[%arg2] : memref<1024xbf16> | ||
%6 = arith.subf %5, %1 : bf16 | ||
%7 = math.exp %6 : bf16 | ||
affine.store %7, %arg0[%arg2] : memref<1024xbf16> | ||
} | ||
%2 = affine.for %arg2 = 0 to 1024 iter_args(%arg3 = %cst) -> (f32) { | ||
%5 = affine.load %arg0[%arg2] : memref<1024xbf16> | ||
%6 = arith.extf %5 : bf16 to f32 | ||
%7 = arith.addf %arg3, %6 : f32 | ||
affine.yield %7 : f32 | ||
} | ||
%3 = arith.divf %cst_0, %2 : f32 | ||
%4 = arith.truncf %3 : f32 to bf16 | ||
affine.for %arg2 = 0 to 1024 { | ||
%5 = affine.load %arg0[%arg2] : memref<1024xbf16> | ||
%6 = arith.mulf %5, %4 : bf16 | ||
affine.store %6, %arg1[%arg2] : memref<1024xbf16> | ||
} | ||
return | ||
} | ||
} |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
|
@@ -26,3 +26,33 @@ extern "C" { | |||||||||
void exp_bf16_vector(bfloat16 *a_in, bfloat16 *c_out) { dut(a_in, c_out); } | ||||||||||
|
||||||||||
} // extern "C" | ||||||||||
//===- scale.cc -------------------------------------------------*- C++ -*-===// | ||||||||||
// | ||||||||||
// This file is licensed under the Apache License v2.0 with LLVM Exceptions. | ||||||||||
// See https://llvm.org/LICENSE.txt for license information. | ||||||||||
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception | ||||||||||
// | ||||||||||
// Copyright (C) 2023, Advanced Micro Devices, Inc. | ||||||||||
// | ||||||||||
//===----------------------------------------------------------------------===// | ||||||||||
|
||||||||||
#define __AIENGINE__ 2 | ||||||||||
#define NOCPP | ||||||||||
#define __AIEARCH__ 20 | ||||||||||
|
||||||||||
#include <stdint.h> | ||||||||||
#include <stdio.h> | ||||||||||
#include <stdlib.h> | ||||||||||
#include <type_traits> | ||||||||||
|
||||||||||
#include <aie_api/aie.hpp> | ||||||||||
|
||||||||||
extern void dut(bfloat16 *a_in, bfloat16 *cout); | ||||||||||
|
||||||||||
extern "C" { | ||||||||||
|
||||||||||
void exp_bf16_vector(bfloat16 *a_in, bfloat16 *c_out) { | ||||||||||
dut(a_in, c_out); | ||||||||||
} | ||||||||||
Comment on lines
+54
to
+56
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||
|
||||||||||
} // extern "C" |
Original file line number | Diff line number | Diff line change | ||||||||
---|---|---|---|---|---|---|---|---|---|---|
@@ -0,0 +1,20 @@ | ||||||||||
import os; | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [black] reported by reviewdog 🐶
Suggested change
|
||||||||||
|
||||||||||
for action in ["rm -f","touch"]: | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [black] reported by reviewdog 🐶
Suggested change
|
||||||||||
cmd = f"{action} results.csv" | ||||||||||
os.system(cmd) | ||||||||||
|
||||||||||
|
||||||||||
for s in [16384,32768,65536,131072,262144]: | ||||||||||
for i in [64,128,256,512,1024]: | ||||||||||
Comment on lines
+8
to
+9
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [black] reported by reviewdog 🐶
Suggested change
|
||||||||||
for f in ["bf16_softmax.mlir", "test.cpp", "aie2.py"]: | ||||||||||
sed = f"sed 's\\1024\\{i}\g' {f}.orig > {f}.first" | ||||||||||
os.system(sed) | ||||||||||
sed = f"sed 's\\65536\\{s}\g' {f}.first > {f}" | ||||||||||
os.system(sed) | ||||||||||
make_clean = f"make clean > /dev/null" | ||||||||||
os.system(make_clean) | ||||||||||
make_all = f"make all" | ||||||||||
os.system(make_all) | ||||||||||
make_profile = f"make profile" | ||||||||||
os.system(make_profile) | ||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [black] reported by reviewdog 🐶
Suggested change
|
Original file line number | Diff line number | Diff line change | ||||||||||||
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
|
@@ -16,19 +16,30 @@ | |||||||||||||
#include <ctime> | ||||||||||||||
#include <fstream> | ||||||||||||||
#include <iostream> | ||||||||||||||
<<<<<<< HEAD | ||||||||||||||
======= | ||||||||||||||
#include <math.h> | ||||||||||||||
>>>>>>> asplos | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
#include <sstream> | ||||||||||||||
#include <stdfloat> | ||||||||||||||
#include <string> | ||||||||||||||
#include <vector> | ||||||||||||||
<<<<<<< HEAD | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
#include <math.h> | ||||||||||||||
======= | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
>>>>>>> asplos | ||||||||||||||
|
||||||||||||||
#include "xrt/xrt_bo.h" | ||||||||||||||
#include "xrt/xrt_device.h" | ||||||||||||||
#include "xrt/xrt_kernel.h" | ||||||||||||||
|
||||||||||||||
constexpr bool VERIFY = true; | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
|
||||||||||||||
<<<<<<< HEAD | ||||||||||||||
constexpr int IN_SIZE = 262144; //*1024; | ||||||||||||||
======= | ||||||||||||||
constexpr int IN_SIZE = 65536; //*1024; | ||||||||||||||
>>>>>>> asplos | ||||||||||||||
constexpr int TILE_SIZE = 1024; | ||||||||||||||
constexpr int OUT_SIZE = IN_SIZE; | ||||||||||||||
|
||||||||||||||
|
@@ -88,6 +99,10 @@ int main(int argc, const char *argv[]) { | |||||||||||||
"the kernel name in the XCLBIN (for instance PP_PRE_FD)")( | ||||||||||||||
"verbosity,v", po::value<int>()->default_value(0), | ||||||||||||||
"the verbosity of the output")( | ||||||||||||||
<<<<<<< HEAD | ||||||||||||||
"profile,p", po::value<std::string>()->default_value(""),"CSV profile")( | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
======= | ||||||||||||||
>>>>>>> asplos | ||||||||||||||
"instr,i", po::value<std::string>()->required(), | ||||||||||||||
"path of file containing userspace instructions to be sent to the LX6"); | ||||||||||||||
po::variables_map vm; | ||||||||||||||
|
@@ -180,7 +195,7 @@ int main(int argc, const char *argv[]) { | |||||||||||||
|
||||||||||||||
int sticky_errors = 0; | ||||||||||||||
|
||||||||||||||
unsigned num_iter = 256; | ||||||||||||||
unsigned num_iter = 64; | ||||||||||||||
float npu_time_total = 0; | ||||||||||||||
float npu_time_min = 9999999; | ||||||||||||||
float npu_time_max = 0; | ||||||||||||||
|
@@ -215,21 +230,22 @@ int main(int argc, const char *argv[]) { | |||||||||||||
std::vector<std::bfloat16_t> RefVec(IN_SIZE); | ||||||||||||||
auto cpu_start = std::chrono::high_resolution_clock::now(); | ||||||||||||||
|
||||||||||||||
for (uint32_t t = 0; t < IN_SIZE; t += TILE_SIZE) { | ||||||||||||||
for (uint32_t t = 0; t < IN_SIZE; t+=TILE_SIZE) { | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
float running = 0.0; | ||||||||||||||
for (uint32_t i = 0; i < TILE_SIZE; i++) { | ||||||||||||||
float ez = (float)(exp(AVec[t + i])); | ||||||||||||||
float ez = (float)(exp(AVec[t+i])); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
running += ez; | ||||||||||||||
RefVec[t + i] = exp(AVec[t + i]); | ||||||||||||||
RefVec[t+i] = exp(AVec[t+i]); | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
} | ||||||||||||||
|
||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
for (uint32_t i = 0; i < TILE_SIZE; i++) { | ||||||||||||||
RefVec[t + i] /= running; | ||||||||||||||
RefVec[t+i] /= running; | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
} | ||||||||||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
auto cpu_stop = std::chrono::high_resolution_clock::now(); | ||||||||||||||
float cpu_time = std::chrono::duration_cast<std::chrono::microseconds>( | ||||||||||||||
cpu_stop - cpu_start) | ||||||||||||||
.count(); | ||||||||||||||
float cpu_time = | ||||||||||||||
std::chrono::duration_cast<std::chrono::microseconds>(cpu_stop - cpu_start) | ||||||||||||||
.count(); | ||||||||||||||
Comment on lines
+246
to
+248
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. [clang-format] reported by reviewdog 🐶
Suggested change
|
||||||||||||||
|
||||||||||||||
cpu_time_total += cpu_time; | ||||||||||||||
cpu_time_min = (cpu_time < cpu_time_min) ? cpu_time : cpu_time_min; | ||||||||||||||
|
@@ -264,6 +280,13 @@ int main(int argc, const char *argv[]) { | |||||||||||||
npu_time_min = (npu_time < npu_time_min) ? npu_time : npu_time_min; | ||||||||||||||
npu_time_max = (npu_time > npu_time_max) ? npu_time : npu_time_max; | ||||||||||||||
|
||||||||||||||
std::string profile = vm["profile"].as<std::string>(); | ||||||||||||||
if (profile.length()) { | ||||||||||||||
std::ofstream of; | ||||||||||||||
of.open(profile, std::ios::app); // Append | ||||||||||||||
of << IN_SIZE << "," << TILE_SIZE << "," << npu_time << std::endl; | ||||||||||||||
} | ||||||||||||||
|
||||||||||||||
if (VERIFY) { | ||||||||||||||
if (!errors) { | ||||||||||||||
std::cout << iter << ": pass! in " << npu_time << "us" << std::endl; | ||||||||||||||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
[black] reported by reviewdog 🐶