Skip to content

Commit

Permalink
Merge pull request #667 from XJDKC/dev
Browse files Browse the repository at this point in the history
Fix bugs of training
  • Loading branch information
nudles authored Apr 8, 2020
2 parents faaa6e7 + 9e77f54 commit f3f6fe0
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 32 deletions.
4 changes: 2 additions & 2 deletions src/core/device/device.cc
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,6 @@ void Device::RunGraph(bool serial) {
bool previous_state = graph_enabled_;
graph_enabled_ = false;

// graph_->Debug();

if (serial) {
// sequential execution
graph_->RunInSerial();
Expand All @@ -60,6 +58,8 @@ void Device::RunGraph(bool serial) {
graph_->RunGraph();
}

// graph_->Debug();

graph_enabled_ = previous_state;
}

Expand Down
105 changes: 85 additions & 20 deletions src/core/scheduler/scheduler.cc
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,10 @@

#include "singa/core/scheduler.h"

#include <algorithm>
#include <functional>
#include <iomanip>
#include <sstream>
#include <thread>
#include <unordered_set>

Expand Down Expand Up @@ -99,39 +102,97 @@ void Graph::Reset() {
}

void Graph::Debug() {
if (dirty_) Analysis();

size_t max_in_num = 0, max_out_num = 0, max_next_num = 0, max_free_num = 0;
for (auto &it : nodes_) {
max_in_num = std::max(max_in_num, it->in_edges_.size());
max_out_num = std::max(max_out_num, it->out_edges_.size());
}

for (auto &it : next_nodes_) {
max_next_num = std::max(max_next_num, it.size());
}

for (auto &it : free_blocks_) {
max_free_num = std::max(max_free_num, it.size());
}

int w = 2;
std::stringstream ss;
ss << "begin nodes:[";
for (size_t i = 0; i < begin_nodes_.size(); ++i) {
ss << begin_nodes_[i]->id_;
}
ss << "]" << std::endl;

size_t size = 0;
for (size_t i = 0; i < nodes_.size(); ++i) {
printf("OP[%2lu]: ", i);
printf("Inputs: ");
ss << "OP[" << std::setw(w) << i;
auto node = nodes_[i];
for (size_t j = 0; j < node->in_edges_.size(); ++j) {
printf("%d\t", blocks_[node->in_edges_[j]->blk_]->id_);

ss << "] Inputs:[";
size = node->in_edges_.size();
for (size_t j = 0; j < max_in_num; ++j) {
if (j < size)
ss << std::setw(w) << blocks_[node->in_edges_[j]->blk_]->id_ << " ";
else
ss << std::setw(w + 1) << " ";
}
for (size_t j = node->in_edges_.size(); j < 3; ++j) {
printf("\t");

ss << "] Outputs:[";
size = node->out_edges_.size();
for (size_t j = 0; j < max_out_num; ++j) {
if (j < size)
ss << std::setw(w) << blocks_[node->out_edges_[j]->blk_]->id_ << " ";
else
ss << std::setw(w + 1) << " ";
}
printf("Outputs: ");
for (size_t j = 0; j < node->out_edges_.size(); ++j) {
printf("%d\t", blocks_[node->out_edges_[j]->blk_]->id_);

ss << "] Next nodes:[";
size = next_nodes_[i].size();
for (size_t j = 0; j < max_next_num; ++j) {
if (j < size)
ss << std::setw(w) << next_nodes_[i][j]->id_ << " ";
else
ss << std::setw(w + 1) << " ";
}

ss << "] Free blocks:[";
size = free_blocks_[i].size();
for (size_t j = 0; j < max_free_num; ++j) {
if (j < size)
ss << std::setw(w) << blocks_[free_blocks_[i][j]]->id_ << " ";
else
ss << std::setw(w + 1) << " ";
}
printf("\n");
ss << "]" << std::endl;
}

std::vector<BlkInfo *> blkInfos;
blkInfos.resize(blocks_.size());

for (auto it : blocks_) {
auto blkInfo = it.second;
printf("Block[%2d]: addr[%p] graph_ref[%d] ref_count[%d] ", blkInfo->id_,
blkInfo->blk_, blkInfo->graph_ref_, it.first->ref_count());
blkInfos[it.second->id_] = it.second;
}

for (auto it : blkInfos) {
auto blkInfo = it;
ss << "Block[" << std::setw(w) << blkInfo->id_ << "] addr[" << std::setw(w)
<< blkInfo->blk_ << "] graph_ref[" << std::setw(w) << blkInfo->graph_ref_
<< "] ref_count[" << std::setw(w) << blkInfo->blk_->ref_count() << "] ";
switch (blkInfo->type_) {
case BlockType::kInput:
printf("type[input] ");
ss << "type[input] ";
break;
case BlockType::kParam:
printf("type[param] ");
ss << "type[param] ";
break;
case BlockType::kInter:
printf("type[inter] ");
ss << "type[inter] ";
break;
case BlockType::kEnd:
printf("type[_end_] ");
ss << "type[_end_] ";
break;
default:
break;
Expand All @@ -140,14 +201,16 @@ void Graph::Debug() {
if (blkInfo->write_node_) {
id = blkInfo->write_node_->id_;
}
printf(" write_node[%2d]", id);
ss << " write_node[" << std::setw(w) << id << "]";
id = -1;
if (blkInfo->last_node_) {
id = blkInfo->last_node_->id_;
}
printf(" last_node[%2d]", id);
printf("\n");
ss << " last_node[" << std::setw(w) << id << "]" << std::endl;
;
}

printf("%s", ss.str().c_str());
}

void Graph::RunGraph() {
Expand Down Expand Up @@ -358,6 +421,8 @@ void Graph::Analysis() {
}

dirty_ = false;

// Debug();
}

void Graph::FreeLoop() {
Expand Down
15 changes: 11 additions & 4 deletions src/core/tensor/tensor.cc
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,9 @@ Tensor Resize(const Tensor &in, const Shape &shape) {

// return new tensor
Tensor Tensor::AsType(const DataType type) {
CHECK(block() && block()->initialized() == true)
<< "the data of the tensor needs be initialized before casting to "
"another type";
if (data_type_ != type) {
Tensor &thisRef = *this;
Tensor ret(shape_, device_, type);
Expand Down Expand Up @@ -1466,16 +1469,20 @@ void Mult(const Tensor &A, const Tensor &B, Tensor *out) {
template <typename SType>
void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
Tensor *C) {
Tensor fakeC;
vector<Block *> read_blocks = {A.block(), B.block()};
// if (beta) read_blocks.push_back(C->block());
if (beta) {
fakeC = *C;
read_blocks.push_back(C->block());
}
if (B.nDim() == 1u) {
CHECK_EQ(A.shape().size(), 2u);
TYPE_LANG_SWITCH(A.data_type(), DType, A.device()->lang(), Lang, {
auto a = TypeCast<SType, DType>(alpha);
auto b = TypeCast<SType, DType>(beta);
Tensor &CRef = *C;
C->device()->Exec(
[a, A, b, B, CRef](Context *ctx) mutable {
[a, A, b, B, CRef, fakeC](Context *ctx) mutable {
GEMV<DType, Lang>(a, A, B, b, &CRef, ctx);
},
read_blocks, {C->block()});
Expand All @@ -1488,7 +1495,7 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,
auto b = TypeCast<SType, DType>(beta);
Tensor &CRef = *C;
C->device()->Exec(
[a, A, b, B, CRef](Context *ctx) mutable {
[a, A, b, B, CRef, fakeC](Context *ctx) mutable {
GEMM<DType, Lang>(a, A, B, b, &CRef, ctx);
},
read_blocks, {C->block()});
Expand Down Expand Up @@ -1523,7 +1530,7 @@ void Mult(const SType alpha, const Tensor &A, const Tensor &B, const SType beta,

Tensor &CRef = *C;
C->device()->Exec(
[a, A_tmp, b, B_tmp, CRef](Context *ctx) mutable {
[a, A_tmp, b, B_tmp, CRef, fakeC](Context *ctx) mutable {
GEMMBatched<DType, Lang>(a, A_tmp, B_tmp, b, &CRef, ctx);
},
read_blocks, {C->block()});
Expand Down
10 changes: 6 additions & 4 deletions test/singa/test_cross_entropy.cc
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,10 @@ class TestSoftmaxCrossEntropy : public ::testing::Test {

TEST_F(TestSoftmaxCrossEntropy, CppForward) {
p.CopyDataFromHostPtr(pdat, 8);
t.AsType(singa::kInt);
EXPECT_TRUE(p.block()->initialized());
t.CopyDataFromHostPtr(tdat, 2);
t.AsType(singa::kInt);


singa::SoftmaxCrossEntropy cross_entropy;
const Tensor& loss = cross_entropy.Forward(singa::kEval, p, t);
Expand All @@ -56,8 +58,8 @@ TEST_F(TestSoftmaxCrossEntropy, CppForward) {

TEST_F(TestSoftmaxCrossEntropy, CppForwardAryTarget) {
p.CopyDataFromHostPtr(pdat, 8);
ta.AsType(singa::kInt);
ta.CopyDataFromHostPtr(tary, 8);
ta.AsType(singa::kInt);

singa::SoftmaxCrossEntropy cross_entropy;
const Tensor& loss = cross_entropy.Forward(singa::kEval, p, ta);
Expand All @@ -70,8 +72,8 @@ TEST_F(TestSoftmaxCrossEntropy, CppForwardAryTarget) {

TEST_F(TestSoftmaxCrossEntropy, CppBackward) {
p.CopyDataFromHostPtr(pdat, 8);
t.AsType(singa::kInt);
t.CopyDataFromHostPtr(tdat, 2);
t.AsType(singa::kInt);

singa::SoftmaxCrossEntropy cross_entropy;
cross_entropy.Forward(singa::kTrain, p, t);
Expand All @@ -90,8 +92,8 @@ TEST_F(TestSoftmaxCrossEntropy, CppBackward) {

TEST_F(TestSoftmaxCrossEntropy, CppBackwardAryTarget) {
p.CopyDataFromHostPtr(pdat, 8);
ta.AsType(singa::kInt);
ta.CopyDataFromHostPtr(tary, 8);
ta.AsType(singa::kInt);

singa::SoftmaxCrossEntropy cross_entropy;
cross_entropy.Forward(singa::kTrain, p, ta);
Expand Down
10 changes: 9 additions & 1 deletion test/singa/test_platform.cc
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,10 @@ using singa::Platform;
TEST(Platform, CreateMultDevice) {
int n = Platform::GetNumGPUs();
auto devs = Platform::CreateCudaGPUs(n);
for (int i = 0; i < devs.size(); i++) {
for (size_t i = 0; i < devs.size(); i++) {
auto b = devs[i]->NewBlock(512 + 512 * (2 - i));
// for lazy allocation
b->mutable_data();
EXPECT_EQ(512 + 512 * (2 - i), devs[i]->GetAllocatedMem());
devs[i]->FreeBlock(b);
}
Expand All @@ -54,6 +56,8 @@ TEST(Platform, CreateDevice) {
size_t size[] = {128, 256, 3, 24};
{
auto ptr = dev->NewBlock(size[0]);
// for lazy allocation
ptr->mutable_data();
auto allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0], allocated);
dev->FreeBlock(ptr);
Expand All @@ -63,9 +67,13 @@ TEST(Platform, CreateDevice) {
auto ptr0 = dev->NewBlock(size[0]);
auto ptr1 = dev->NewBlock(size[1]);
auto ptr2 = dev->NewBlock(size[2]);
ptr0->mutable_data();
ptr1->mutable_data();
ptr2->mutable_data();
auto allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0] + size[1] + size[2], allocated);
auto ptr3 = dev->NewBlock(size[3]);
ptr3->mutable_data();
allocated = dev->GetAllocatedMem();
EXPECT_LE(size[0] + size[1] + size[2] + size[3], allocated);
dev->FreeBlock(ptr0);
Expand Down
2 changes: 1 addition & 1 deletion test/singa/test_snapshot.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ TEST(Snapshot, ReadIntTest) {
singa::Snapshot int_snapshot_write(prefix + ".int",
singa::Snapshot::kWrite);
singa::Tensor int_param(singa::Shape{4});
int_param.AsType(singa::kInt);
int_param.CopyDataFromHostPtr(int_data, 4);
int_param.AsType(singa::kInt);
int_snapshot_write.Write("IntParam", int_param);
}

Expand Down

0 comments on commit f3f6fe0

Please sign in to comment.