Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

added template for data reader to pass conduit node from driver #2473

Draft
wants to merge 3 commits into
base: develop
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -920,6 +920,7 @@ add_subdirectory(applications/CANDLE/pilot2/tools)
add_subdirectory(applications/ATOM/utils)
add_subdirectory(tests)
add_subdirectory(scripts)
add_subdirectory(core-driver)
bvanessen marked this conversation as resolved.
Show resolved Hide resolved

################################################################
# Install LBANN
Expand Down
2 changes: 1 addition & 1 deletion cmake/configure_files/LBANNConfig.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ set(LBANN_HAS_DIHYDROGEN @LBANN_HAS_DIHYDROGEN@)
set(LBANN_HAS_DISTCONV @LBANN_HAS_DISTCONV@)
set(LBANN_HAS_DOXYGEN @LBANN_HAS_DOXYGEN@)
set(LBANN_HAS_EMBEDDED_PYTHON @LBANN_HAS_EMBEDDED_PYTHON@)
set(LBANN_HAS_FFTW @LBANN_HAS_FFTW@
set(LBANN_HAS_FFTW @LBANN_HAS_FFTW@)
set(LBANN_HAS_FFTW_FLOAT @LBANN_HAS_FFTW_FLOAT@)
set(LBANN_HAS_FFTW_DOUBLE @LBANN_HAS_FFTW_DOUBLE@)
set(LBANN_HAS_GPU_FP16 @LBANN_HAS_GPU_FP16@)
Expand Down
21 changes: 17 additions & 4 deletions core-driver/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
cmake_minimum_required(VERSION 3.18.0)
project(my_lbann_test C CXX)
cmake_minimum_required(VERSION 3.21.0)
project(my_lbann_test CXX)
find_package(LBANN 0.102.0 REQUIRED)
bvanessen marked this conversation as resolved.
Show resolved Hide resolved
add_executable(Main main.cpp)
target_link_libraries(Main PRIVATE LBANN::lbann)
find_package(Conduit CONFIG REQUIRED)
add_executable(lbann-core main.cpp)
bvanessen marked this conversation as resolved.
Show resolved Hide resolved
target_link_libraries(lbann-core PRIVATE LBANN::lbann)

#target_link_libraries(lbann-bin lbann)
set_target_properties(lbann-core
PROPERTIES
OUTPUT_NAME lbann-core-driver
RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

#list(APPEND LBANN_EXE_TGTS lbann-core)

install(TARGETS lbann-core
EXPORT LBANNTargets
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
107 changes: 93 additions & 14 deletions core-driver/main.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,11 @@
#include <mpi.h>
#include <stdio.h>

// Add test-specific options
void construct_opts(int argc, char **argv) {
auto& arg_parser = lbann::global_argument_parser();
lbann::construct_std_options();
lbann::construct_datastore_options();
arg_parser.add_option("samples",
{"-n"},
"Number of samples to run inference on",
Expand All @@ -52,20 +55,76 @@ void construct_opts(int argc, char **argv) {
"Number of labels in dataset",
10);
arg_parser.add_option("minibatchsize",
{"-mbs"},
{"--mbs"},
"Number of samples in a mini-batch",
16);
arg_parser.add_flag("use_conduit",
{"--conduit"},
"Use Conduit node samples (Default is non-distributed matrix)");
arg_parser.add_flag("use_dist_matrix",
{"--dist"},
"Use Hydrogen distributed matrix (Default is non-distributed matrix)");
arg_parser.add_required_argument<std::string>
("model",
"Directory containing checkpointed model");
arg_parser.parse(argc, argv);
}

El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
random_samples(El::Grid const& g, int n, int c, int h, int w) {
// Generates random samples and labels for mnist data in Hydrogen matrix
std::map<
std::string,
El::Matrix<float, El::Device::CPU>>
mat_mnist_samples(int n, int c, int h, int w)
{
El::Matrix<float, El::Device::CPU>
samples(c * h * w, n);
El::MakeUniform(samples);
El::Matrix<float, El::Device::CPU>
labels(1, n);
El::MakeUniform(labels);
std::map<
std::string,
El::Matrix<float, El::Device::CPU>>
samples_map = {{"data/samples", samples}, {"data/labels", labels}};
return samples_map;
}

// Generates random samples and labels for mnist data in Hydrogen distributed matrix
std::map<
std::string,
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>>
distmat_mnist_samples(El::Grid const& g, int n, int c, int h, int w)
{
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
samples(n, c * h * w, g);
samples(c * h * w, n, g);
El::MakeUniform(samples);
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>
labels(1, n, g);
El::MakeUniform(labels);
std::map<
std::string,
El::DistMatrix<float, El::STAR, El::STAR, El::ELEMENT, El::Device::CPU>>
samples_map = {{"data/samples", samples}, {"data/labels", labels}};
return samples_map;
}

// Fills array with random values
void random_fill(float *arr, int size, int max_val=255) {
for (int i; i < size; i++) {
arr[i] = (float)(std::rand() % max_val) / (float)max_val;
}
}

// Generates random samples and labels for mnist data in vector of Conduit nodes
std::vector<conduit::Node> conduit_mnist_samples(int n, int c, int h, int w) {
std::vector<conduit::Node> samples(n);
int sample_size = c * h * w;
float this_sample[sample_size];
for (int i; i<n; i++) {
random_fill(this_sample, sample_size);
samples[i]["data/samples"].set(this_sample, sample_size);
samples[i]["data/labels"] = std::rand() % 10;
}
return samples;
}

Expand All @@ -79,10 +138,13 @@ int main(int argc, char **argv) {
int rank;
MPI_Comm_rank(MPI_COMM_WORLD, &rank);

// Get input arguments and print values
// Get input arguments, check and print values
construct_opts(argc, argv);
auto& arg_parser = lbann::global_argument_parser();
if (rank == 0) {
if (arg_parser.get<bool>("use_conduit") && arg_parser.get<bool>("use_dist_matrix")) {
LBANN_ERROR("Cannot use conduit node and distributed matrix together, choose one: --conduit --dist");
}
std::stringstream msg;
msg << "Model: " << arg_parser.get<std::string>("model") << std::endl;
msg << "{ N, c, h, w } = { " << arg_parser.get<int>("samples") << ", ";
Expand All @@ -94,8 +156,8 @@ int main(int argc, char **argv) {
std::cout << msg.str();
}

// Load model and run inference on samples
auto lbann_comm = lbann::initialize_lbann(MPI_COMM_WORLD);

auto m = lbann::load_inference_model(lbann_comm.get(),
arg_parser.get<std::string>("model"),
arg_parser.get<int>("minibatchsize"),
Expand All @@ -105,14 +167,31 @@ int main(int argc, char **argv) {
arg_parser.get<int>("width")
},
{arg_parser.get<int>("labels")});
auto samples = random_samples(lbann_comm->get_trainer_grid(),
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
auto labels = lbann::infer(m.get(),
samples,
arg_parser.get<int>("minibatchsize"));

// three options for data generation
if (arg_parser.get<bool>("use_conduit")) {
auto samples = conduit_mnist_samples(arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
} else if (arg_parser.get<bool>("use_dist_matrix")) {
auto samples = distmat_mnist_samples(lbann_comm->get_trainer_grid(),
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
} else {
auto samples = mat_mnist_samples(
arg_parser.get<int>("samples"),
arg_parser.get<int>("channels"),
arg_parser.get<int>("height"),
arg_parser.get<int>("width"));
lbann::set_inference_samples(samples);
}

auto labels = lbann::inference(m.get());

// Print inference results
if (lbann_comm->am_world_master()) {
Expand Down
10 changes: 10 additions & 0 deletions core-driver/run.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
export AL_PROGRESS_RANKS_PER_NUMA_NODE=2
export OMP_NUM_THREADS=8
export MV2_USE_RDMA_CM=0

# This should be a checkpointed lenet model
MODEL_LOC="path/to/checkpointed/model"

./Main $MODEL_LOC
./Main $MODEL_LOC --dist
./Main $MODEL_LOC --conduit
1 change: 1 addition & 0 deletions include/lbann/data_ingestion/readers/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ set_full_path(THIS_DIR_HEADERS
metadata.hpp
# Data readers
data_reader_cifar10.hpp
data_reader_conduit.hpp
data_reader_csv.hpp
data_reader_image.hpp
data_reader_HDF5.hpp
Expand Down
72 changes: 72 additions & 0 deletions include/lbann/data_ingestion/readers/data_reader_conduit.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
////////////////////////////////////////////////////////////////////////////////
// Copyright (c) 2014-2021, Lawrence Livermore National Security, LLC.
// Produced at the Lawrence Livermore National Laboratory.
// Written by the LBANN Research Team (B. Van Essen, et al.) listed in
// the CONTRIBUTORS file. <[email protected]>
//
// LLNL-CODE-697807.
// All rights reserved.
//
// This file is part of LBANN: Livermore Big Artificial Neural Network
// Toolkit. For details, see http://software.llnl.gov/LBANN or
// https://github.com/LLNL/LBANN.
//
// Licensed under the Apache License, Version 2.0 (the "Licensee"); you
// may not use this file except in compliance with the License. You may
// obtain a copy of the License at:
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the license.
////////////////////////////////////////////////////////////////////////////////

#ifndef LBANN_DATA_READER_CONDUIT_HPP
#define LBANN_DATA_READER_CONDUIT_HPP

#include "lbann/data_readers/data_reader.hpp"
#include "lbann/data_store/data_store_conduit.hpp"

namespace lbann {
/**
* A generalized data reader for passed in conduit nodes.
*/
class conduit_data_reader : public generic_data_reader
{
public:
conduit_data_reader* copy() const override { return new conduit_data_reader(*this); }
bool has_conduit_output() override { return true; }
void load() override;
bool fetch_conduit_node(conduit::Node& sample, int data_id) override;

void set_data_dims(std::vector<int> dims);
void set_label_dims(std::vector<int> dims);

std::string get_type() const override { return "conduit_data_reader"; }
int get_linearized_data_size() const override {
int data_size = 1;
for(int i : m_data_dims) {
data_size *= i;
}
return data_size;
}
int get_linearized_label_size() const override {
int label_size = 1;
for(int i : m_label_dims) {
label_size *= i;
}
return label_size;
}

protected:
bvanessen marked this conversation as resolved.
Show resolved Hide resolved
std::vector<int> m_data_dims;
std::vector<int> m_label_dims;

}; // END: class conduit_data_reader

} // namespace lbann

#endif // LBANN_DATA_READER_CONDUIT_HPP
Loading
Loading