Skip to content

Commit

Permalink
Make leela2onnx Conv-nodes compatible with onnx2pytorch (LeelaChessZe…
Browse files Browse the repository at this point in the history
…ro#1924)

* Accomodating onnx2pytorch for conv-blocks
* alternate onnx layernorm implementation with explicit fp32 casting
* Switch for supporting onnx2pytorch

Co-authored-by: borg323 <[email protected]>
(cherry picked from commit ed4e669)
  • Loading branch information
patrik-ha authored and PikaCat-OuO committed Nov 27, 2023
1 parent 5d3f853 commit bb4682b
Show file tree
Hide file tree
Showing 6 changed files with 102 additions and 20 deletions.
6 changes: 6 additions & 0 deletions src/lc0ctl/leela2onnx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,8 @@ const OptionId kOutputValue{
"ONNX name to use for value policy head output node."};
const OptionId kOutputMlh{"mlh-head-name", "MlhHeadName",
"ONNX name to use for the MLH head output node."};
const OptionId kOnnxToPytorch{"onnx2pytorch", "Onnx2Pytorch",
"Only use layer definitions supported by onnx2pytorch."};

bool ProcessParameters(OptionsParser* options) {
options->Add<StringOption>(kInputFilenameId);
Expand All @@ -63,6 +65,7 @@ bool ProcessParameters(OptionsParser* options) {
options->Add<StringOption>(kOutputWdl) = "/output/wdl";
options->Add<StringOption>(kOutputValue) = "/output/value";
options->Add<StringOption>(kOutputMlh) = "/output/mlh";
options->Add<BoolOption>(kOnnxToPytorch) = false;
if (!options->ProcessAllFlags()) return false;

const OptionsDict& dict = options->GetOptionsDict();
Expand Down Expand Up @@ -94,6 +97,9 @@ void ConvertLeelaToOnnx() {
onnx_options.output_wdl = dict.Get<std::string>(kOutputWdl);
onnx_options.output_value = dict.Get<std::string>(kOutputValue);
onnx_options.output_wdl = dict.Get<std::string>(kOutputWdl);
// onnx2pytorch only needs an alternate layernorm-implementation, so it's currently
// only enables that. Might need to be extended in the future.
onnx_options.alternative_layer_normalization = dict.Get<bool>(kOnnxToPytorch);
weights_file = ConvertWeightsToOnnx(weights_file, onnx_options);
}

Expand Down
38 changes: 38 additions & 0 deletions src/neural/onnx/builder.cc
Original file line number Diff line number Diff line change
Expand Up @@ -137,10 +137,12 @@ std::string OnnxBuilder::Conv(const std::string& name,
const OnnxConst& kernel_weights,
const OnnxConst& bias_weights, int pads) {
auto* node = model_.mutable_graph()->add_node();
auto shape = kernel_weights.GetDimensions().back();
auto out = PopulateStdNodeFields(node, name, input_name, "Conv");
node->add_input(AddInitializer(name + "/w/kernel", kernel_weights));
node->add_input(AddInitializer(name + "/w/bias", bias_weights));
AddIntsAttribute(node, "pads", {pads, pads, pads, pads});
AddIntsAttribute(node, "kernel_shape", {shape, shape});
return out;
}

Expand Down Expand Up @@ -438,4 +440,40 @@ std::string OnnxBuilder::Mish(const std::string& name,
return PopulateStdNodeFields(node, name, input, "Mish");
}

std::string OnnxBuilder::Sqrt(const std::string& name,
const std::string& input) {
auto* node = model_.mutable_graph()->add_node();
return PopulateStdNodeFields(node, name, input, "Sqrt");
}

std::string OnnxBuilder::Reciprocal(const std::string& name,
const std::string& input) {
auto* node = model_.mutable_graph()->add_node();
return PopulateStdNodeFields(node, name, input, "Reciprocal");
}

std::string OnnxBuilder::Cast(const std::string& name, const std::string& input,
pblczero::TensorProto::DataType type) {
auto* node = model_.mutable_graph()->add_node();
auto out = PopulateStdNodeFields(node, name, input, "Cast");
AddIntAttribute(node, "to", type);
return out;
}

std::string OnnxBuilder::ReduceMean(const std::string& name,
const std::string& input,
std::initializer_list<int> axes) {
auto* node = model_.mutable_graph()->add_node();
auto out = PopulateStdNodeFields(node, name, input, "ReduceMean");
if (opset_ < 18) {
AddIntsAttribute(node, "axes", axes);
} else {
node->add_input(AddInitializer(
name + "/axes",
Int64OnnxConst(std::vector<int64_t>(begin(axes), end(axes)),
{static_cast<int>(axes.size())})));
}
return out;
}

} // namespace lczero
8 changes: 7 additions & 1 deletion src/neural/onnx/builder.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ class OnnxBuilder {
std::string Add(const std::string& name, const std::string& input1,
const std::string& input2);
std::string Add(const std::string& name, const std::string& input1,
const OnnxConst&);
const OnnxConst& input2);
std::string GlobalAveragePool(const std::string& name,
const std::string& input);
std::string Squeeze(const std::string& name, const std::string& input,
Expand Down Expand Up @@ -120,6 +120,12 @@ class OnnxBuilder {
std::string Where(const std::string& name, const std::string& input1,
const std::string& input2, const std::string& input3);
std::string Mish(const std::string& name, const std::string& input);
std::string Sqrt(const std::string& name, const std::string& input);
std::string Reciprocal(const std::string& name, const std::string& input);
std::string Cast(const std::string& name, const std::string& input,
pblczero::TensorProto::DataType type);
std::string ReduceMean(const std::string& name, const std::string& input,
std::initializer_list<int> axes);
// Returns ONNX model as protobuf.
const pblczero::ModelProto& as_proto() const { return model_; }
// Returns serialized model.
Expand Down
67 changes: 48 additions & 19 deletions src/neural/onnx/converter.cc
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,11 @@ class Converter {
const std::string& encoder_in,
const std::string& name);

std::string MakeLayerNorm(OnnxBuilder* builder, const std::string& input,
const std::string& name,
const lczero::OnnxConst& gammas,
const lczero::OnnxConst& betas, float eps = 1e-6);

std::string MakeEncoderLayer(OnnxBuilder* builder,
const LegacyWeights::EncoderLayer& layer,
int embedding_size, int heads,
Expand Down Expand Up @@ -320,10 +325,10 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
name + "/smolgen/dense1/b", flow,
*GetWeghtsConverter(layer.mha.smolgen.dense1_b, {smolgen_hidden_sz}));
flow = MakeActivation(builder, flow, name + "/smolgen/dense1", activation);
flow = builder->LayerNormalization(
name + "/smolgen/ln1", flow,
flow = MakeLayerNorm(
builder, flow, name + "/smolgen/ln1",
*GetWeghtsConverter(layer.mha.smolgen.ln1_gammas, {smolgen_hidden_sz}),
*GetWeghtsConverter(layer.mha.smolgen.ln1_betas, {smolgen_hidden_sz}), 1,
*GetWeghtsConverter(layer.mha.smolgen.ln1_betas, {smolgen_hidden_sz}),
1e-3);
flow = builder->MatMul(
name + "/smolgen/dense2/w", flow,
Expand All @@ -333,13 +338,12 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
*GetWeghtsConverter(layer.mha.smolgen.dense2_b,
{smolgen_gen_sz * heads}));
flow = MakeActivation(builder, flow, name + "/smolgen/dense2", activation);
flow = builder->LayerNormalization(
name + "/smolgen/ln2", flow,
*GetWeghtsConverter(layer.mha.smolgen.ln2_gammas,
{smolgen_gen_sz * heads}),
*GetWeghtsConverter(layer.mha.smolgen.ln2_betas,
{smolgen_gen_sz * heads}),
1, 1e-3);
flow = MakeLayerNorm(builder, flow, name + "/smolgen/ln2",
*GetWeghtsConverter(layer.mha.smolgen.ln2_gammas,
{smolgen_gen_sz * heads}),
*GetWeghtsConverter(layer.mha.smolgen.ln2_betas,
{smolgen_gen_sz * heads}),
1e-3);
flow =
builder->Reshape(name + "/smolgen/gen_from/reshape", flow,
builder->AddInitializer(
Expand All @@ -354,6 +358,33 @@ std::string Converter::MakeSmolgen(OnnxBuilder* builder,
return flow;
}

std::string Converter::MakeLayerNorm(OnnxBuilder* builder,
const std::string& input,
const std::string& name,
const lczero::OnnxConst& gammas,
const lczero::OnnxConst& betas,
float eps) {
if (!options_.alternative_layer_normalization) {
return builder->LayerNormalization(name, input, gammas, betas, 1, eps);
}
auto in =
builder->Cast(name + "/to_float", input, pblczero::TensorProto::FLOAT);
auto flow = builder->ReduceMean(name + "/mean", in, {1});
in = builder->Sub(name + "/centered", in, flow);
flow = builder->Mul(name + "/squared", in, in);
flow = builder->ReduceMean(name + "/var", flow, {1});
flow =
builder->Add(name + "/var_eps", flow,
static_cast<const OnnxConst&>(FloatOnnxConst({eps}, {1})));
flow = builder->Sqrt(name + "/std", flow);
flow = builder->Reciprocal(name + "/inv_std", flow);
flow = builder->Mul(name + "/normalized", in, flow);
flow = builder->Cast(name + "/to_data_type", flow, GetDataType());
flow = builder->Mul(name + "/gammas", flow, gammas);
flow = builder->Add(name + "/betas", flow, betas);
return flow;
}

std::string Converter::MakeEncoderLayer(
OnnxBuilder* builder, const LegacyWeights::EncoderLayer& layer,
int embedding_size, int heads, const std::string& encoder_in,
Expand Down Expand Up @@ -430,11 +461,10 @@ std::string Converter::MakeEncoderLayer(
alpha_in = encoder_in;
}
flow = builder->Add(name + "/mha/out/skip", flow, alpha_in);

auto ffn_in = builder->LayerNormalization(
name + "/ln1", flow,
*GetWeghtsConverter(layer.ln1_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln1_betas, {embedding_size}), 1);
auto ffn_in =
MakeLayerNorm(builder, flow, name + "/ln1",
*GetWeghtsConverter(layer.ln1_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln1_betas, {embedding_size}));
const int dff_size = layer.ffn.dense1_b.size();
flow =
builder->MatMul(name + "/ffn/dense1/w", ffn_in,
Expand Down Expand Up @@ -462,10 +492,9 @@ std::string Converter::MakeEncoderLayer(
alpha_ffn_in = ffn_in;
}
flow = builder->Add(name + "/ffn/skip", flow, alpha_ffn_in);
flow = builder->LayerNormalization(
name + "/ln2", flow,
*GetWeghtsConverter(layer.ln2_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln2_betas, {embedding_size}), 1);
flow = MakeLayerNorm(builder, flow, name + "/ln2",
*GetWeghtsConverter(layer.ln2_gammas, {embedding_size}),
*GetWeghtsConverter(layer.ln2_betas, {embedding_size}));
return flow;
}

Expand Down
1 change: 1 addition & 0 deletions src/neural/onnx/converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct WeightsToOnnxConverterOptions {
int batch_size = -1;
int opset = 17;
bool alt_mish = false;
bool alternative_layer_normalization = false;
};

// Converts "classical" weights file to weights file with embedded ONNX model.
Expand Down
2 changes: 2 additions & 0 deletions src/neural/onnx/network_onnx.cc
Original file line number Diff line number Diff line change
Expand Up @@ -455,6 +455,8 @@ std::unique_ptr<Network> MakeOnnxNetwork(const std::optional<WeightsFile>& w,
converter_options.opset = opts.GetOrDefault<int>("opset", 17);
converter_options.alt_mish = opts.GetOrDefault<bool>(
"alt_mish", kProvider == OnnxProvider::CPU ? true : false);
converter_options.alternative_layer_normalization =
opts.GetOrDefault<bool>("alternative_layer_normalization", true);
converter_options.data_type_ =
fp16 ? WeightsToOnnxConverterOptions::DataType::kFloat16
: WeightsToOnnxConverterOptions::DataType::kFloat32;
Expand Down

0 comments on commit bb4682b

Please sign in to comment.