Skip to content

Commit

Permalink
#6344: Port RoBERTa model to n300
Browse files Browse the repository at this point in the history
  • Loading branch information
kkeerthana0573 committed Nov 21, 2024
1 parent 019a5cc commit 8df21f2
Show file tree
Hide file tree
Showing 10 changed files with 258 additions and 80 deletions.
2 changes: 1 addition & 1 deletion models/demos/bert/tt/ttnn_optimized_bert.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def bert_attention(
attention_scores = ttnn.matmul(
query,
key,
memory_config=ttnn.L1_MEMORY_CONFIG,
memory_config=ttnn.DRAM_MEMORY_CONFIG if getattr(config, "use_dram", False) else ttnn.L1_MEMORY_CONFIG,
dtype=ttnn.bfloat16,
core_grid=ttnn.CoreGrid(y=batch_size, x=num_cores_x),
)
Expand Down
19 changes: 19 additions & 0 deletions models/demos/roberta/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
## functional_roberta Demo
## How to Run

If you wish to run the demo for ttnn_optimized_functional_roberta, use `pytest --disable-warnings models/demos/roberta/demo/demo.py::test_demo[models.demos.bert.tt.ttnn_optimized_bert-8-384-deepset/roberta-large-squad2-models/demos/roberta/demo/input_data.json]` to run the demo.

If you wish to run the demo with a different input use `pytest --disable-warnings models/demos/roberta/demo/demo.py::test_demo[models.demos.bert.tt.ttnn_optimized_bert-8-384-deepset/roberta-large-squad2-<address_to_your_json_file>]`. This file is expected to have exactly 8 inputs.

Our second demo is designed to run SQuADV2 dataset, run this with `pytest --disable-warnings models/demos/roberta/demo/demo.py::test_demo_squadv2[models.demos.bert.tt.ttnn_optimized_bert-8-384-3-deepset/roberta-large-squad2]`.

If you wish to run for `n_iterations` samples, use `pytest --disable-warnings models/demos/roberta/demo/demo.py::test_demo_squadv2[models.demos.bert.tt.ttnn_optimized_bert-8-384-<n_iterations>-deepset/roberta-large-squad2]`


# Inputs
Inputs by default are provided from `input_data.json`. If you wish you to change the inputs, provide a different path to test_demo.

We do not recommend modifying `input_data.json` file.

# Details
The entry point to functional_roberta model is bert_for_question_answering in `models/demos/bert/tt/ttnn_bert.py` (`models/demos/bert/tt/ttnn_optimized_bert.py` for optimized version). The model picks up certain configs and weights from huggingface pretrained model. We have used `deepset/roberta-large-squad2` version from huggingface as our reference.
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,6 @@
disable_persistent_kernel_cache,
profiler,
)
from models.demos.bert.tt import ttnn_bert
from models.demos.bert.tt import ttnn_optimized_bert

from models.datasets.dataset_squadv2 import squadv2_1K_samples_input, squadv2_answer_decode_batch
Expand Down Expand Up @@ -42,6 +41,12 @@ def load_inputs(input_path, batch):
return context, question


def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
mask = input_ids.ne(padding_idx).int()
incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
return incremental_indices.long() + padding_idx


def run_roberta_question_and_answering_inference(
device,
use_program_cache,
Expand All @@ -60,13 +65,9 @@ def run_roberta_question_and_answering_inference(
tokenizer = RobertaTokenizer.from_pretrained(model_name)
config = hugging_face_reference_model.config
nlp = pipeline("question-answering", model=hugging_face_reference_model, tokenizer=tokenizer)
config.use_dram = True

if bert == ttnn_bert:
tt_model_name = f"ttnn_{model_name}"
elif bert == ttnn_optimized_bert:
tt_model_name = f"ttnn_{model_name}_optimized"
else:
raise ValueError(f"Unknown bert: {bert}")
tt_model_name = f"ttnn_{model_name}_optimized"

profiler.start(f"preprocessing_parameter")
parameters = preprocess_model_parameters(
Expand Down Expand Up @@ -105,10 +106,14 @@ def run_roberta_question_and_answering_inference(

profiler.start(f"preprocessing_input")

position_ids = create_position_ids_from_input_ids(
input_ids=roberta_input.input_ids, padding_idx=config.pad_token_id
)
ttnn_roberta_inputs = bert.preprocess_inputs(
roberta_input["input_ids"],
roberta_input["token_type_ids"],
torch.zeros(1, sequence_size) if bert == ttnn_optimized_bert else None,
position_ids,
roberta_input["attention_mask"],
device=device,
)
profiler.end(f"preprocessing_input")
Expand Down Expand Up @@ -139,7 +144,8 @@ def run_roberta_question_and_answering_inference(

tt_answer = nlp.postprocess([tt_res], **postprocess_params)

logger.info(f"answer: {tt_answer['answer']}\n")
logger.info(f"Question: {question[i]}")
logger.info(f"Answer: {tt_answer['answer']}\n")
model_answers[i] = tt_answer["answer"]

profiler.end("post_processing_output_to_string")
Expand Down Expand Up @@ -167,6 +173,9 @@ def run_roberta_question_and_answering_inference_squad_v2(
bert,
n_iterations,
):
expected_exact_match = 83.33333333333333
expected_f1_score = 91.66666666666667

disable_persistent_kernel_cache()

hugging_face_reference_model = RobertaForQuestionAnswering.from_pretrained(model_name)
Expand All @@ -175,13 +184,9 @@ def run_roberta_question_and_answering_inference_squad_v2(
# set up tokenizer
tokenizer = RobertaTokenizer.from_pretrained(model_name)
config = hugging_face_reference_model.config
config.use_dram = True

if bert == ttnn_bert:
tt_model_name = f"ttnn_{model_name}"
elif bert == ttnn_optimized_bert:
tt_model_name = f"ttnn_{model_name}_optimized"
else:
raise ValueError(f"Unknown bert: {bert}")
tt_model_name = f"ttnn_{model_name}_optimized"

parameters = preprocess_model_parameters(
model_name=tt_model_name,
Expand All @@ -208,10 +213,14 @@ def run_roberta_question_and_answering_inference_squad_v2(
if i < n_iterations:
batch_data = batch[0]
curr_batch_size = batch_data["input_ids"].shape[0]
position_ids = create_position_ids_from_input_ids(
input_ids=batch_data.input_ids, padding_idx=config.pad_token_id
)
ttnn_roberta_inputs = bert.preprocess_inputs(
batch_data["input_ids"],
batch_data["token_type_ids"],
torch.zeros(1, sequence_size) if bert == ttnn_optimized_bert else None,
position_ids,
batch_data["attention_mask"],
device=device,
)

Expand Down Expand Up @@ -250,44 +259,50 @@ def run_roberta_question_and_answering_inference_squad_v2(
i += 1
eval_score = squad_metric.compute(predictions=pred_labels, references=true_labels)
cpu_eval_score = squad_metric.compute(predictions=cpu_pred_labels, references=true_labels)
logger.info(f"\tTT_Eval: exact: {eval_score['exact']} -- F1: {eval_score['f1']}")
logger.info(f"TT_Eval: exact: {eval_score['exact']} -- F1: {eval_score['f1']}")

assert eval_score["exact"] == expected_exact_match or eval_score["f1"] == expected_f1_score, (
f"Expected Exact Match: {expected_exact_score}, Actual Exact Match: {eval_score['exact']}; "
f"Expected F1 Score: {expected_f1_score}, Actual F1 Score: {eval_score['f1']}"
)

@pytest.mark.parametrize("model_name", ["deepset/roberta-large-squad2"])
@pytest.mark.parametrize("bert", [ttnn_bert, ttnn_optimized_bert])
def test_demo(
input_path,
model_name,
bert,
device,
use_program_cache,
):

@pytest.mark.parametrize(
"model_name, input_loc",
((["deepset/roberta-large-squad2", "models/demos/roberta/demo/input_data.json"]),),
)
@pytest.mark.parametrize(
("bert", "batch_size", "sequence_size"),
((ttnn_optimized_bert, 8, 384),),
)
def test_demo(device, use_program_cache, model_name, input_loc, bert, batch_size, sequence_size):
disable_persistent_kernel_cache()
disable_compilation_reports()

return run_roberta_question_and_answering_inference(
device=device,
use_program_cache=use_program_cache,
model_name=model_name,
batch_size=8,
sequence_size=384,
batch_size=batch_size,
sequence_size=sequence_size,
bert=bert,
input_path=input_path,
input_path=input_loc,
)


@pytest.mark.parametrize("model_name", ["deepset/roberta-large-squad2"])
@pytest.mark.parametrize("bert", [ttnn_bert, ttnn_optimized_bert])
@pytest.mark.parametrize(
"n_iterations",
((3),),
("bert", "batch_size", "sequence_size", "n_iterations"),
((ttnn_optimized_bert, 8, 384, 3),),
)
def test_demo_squadv2(
device,
use_program_cache,
model_name,
bert,
batch_size,
sequence_size,
n_iterations,
device,
use_program_cache,
):
disable_persistent_kernel_cache()
disable_compilation_reports()
Expand All @@ -296,8 +311,8 @@ def test_demo_squadv2(
device=device,
use_program_cache=use_program_cache,
model_name=model_name,
batch_size=8,
sequence_size=384,
batch_size=batch_size,
sequence_size=sequence_size,
bert=bert,
n_iterations=n_iterations,
)
File renamed without changes.
37 changes: 37 additions & 0 deletions models/demos/roberta/tests/test_perf_device_roberta.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
# SPDX-FileCopyrightText: © 2023 Tenstorrent Inc.

# SPDX-License-Identifier: Apache-2.0

import pytest
from models.utility_functions import is_grayskull
from models.perf.device_perf_utils import run_device_perf, check_device_perf, prep_device_perf_report


@pytest.mark.models_device_performance_bare_metal
@pytest.mark.parametrize(
"batch_size, test",
[
[8, "sequence_size=384-batch_size=8-model_name=deepset/roberta-large-squad2"],
],
)
def test_perf_device_bare_metal(batch_size, test):
subdir = "ttnn_roberta"
num_iterations = 1
margin = 0.03
expected_perf = 121.93 if is_grayskull else 122.7

command = f"pytest tests/ttnn/integration_tests/roberta/test_ttnn_optimized_roberta.py::test_roberta_for_question_answering[{test}]"
cols = ["DEVICE FW", "DEVICE KERNEL", "DEVICE BRISC KERNEL"]

inference_time_key = "AVG DEVICE KERNEL SAMPLES/S"
expected_perf_cols = {inference_time_key: expected_perf}

post_processed_results = run_device_perf(command, subdir, num_iterations, cols, batch_size)
expected_results = check_device_perf(post_processed_results, margin, expected_perf_cols)
prep_device_perf_report(
model_name=f"ttnn_roberta_{batch_size}",
batch_size=batch_size,
post_processed_results=post_processed_results,
expected_results=expected_results,
comments=test.replace("/", "_"),
)
Original file line number Diff line number Diff line change
Expand Up @@ -2,25 +2,19 @@

# SPDX-License-Identifier: Apache-2.0

import time

import pytest

from loguru import logger
import ttnn
import time
import torch
import pytest
import transformers


import ttnn

from models.demos.bert.tt import ttnn_bert
from loguru import logger
from models.demos.bert.tt import ttnn_optimized_bert

from ttnn.model_preprocessing import preprocess_model_parameters

from models.utility_functions import (
is_wormhole_b0,
is_blackhole,
is_grayskull,
enable_persistent_kernel_cache,
disable_persistent_kernel_cache,
)
Expand All @@ -29,34 +23,34 @@

def get_expected_times(bert):
return {
ttnn_bert: (13, 32),
ttnn_optimized_bert: (12, 0.092),
ttnn_optimized_bert: (8.7, 0.15) if is_grayskull else (12.5, 0.14),
}[bert]


@pytest.mark.skipif(is_wormhole_b0() or is_blackhole(), reason="Unsupported on WH and BH")
def create_position_ids_from_input_ids(input_ids, padding_idx, past_key_values_length=0):
mask = input_ids.ne(padding_idx).int()
incremental_indices = (torch.cumsum(mask, dim=1).type_as(mask) + past_key_values_length) * mask
return incremental_indices.long() + padding_idx


@pytest.mark.models_performance_bare_metal
@pytest.mark.models_performance_virtual_machine
@pytest.mark.parametrize("model_name", ["deepset/roberta-large-squad2"])
@pytest.mark.parametrize("batch_size", [8])
@pytest.mark.parametrize("sequence_size", [384])
@pytest.mark.parametrize("bert", [ttnn_bert, ttnn_optimized_bert])
@pytest.mark.parametrize("bert", [ttnn_optimized_bert])
def test_performance(device, use_program_cache, model_name, batch_size, sequence_size, bert):
disable_persistent_kernel_cache()

config = transformers.RobertaConfig.from_pretrained(model_name)
config.use_dram = True

input_ids = torch.randint(0, config.vocab_size, (batch_size, sequence_size)).to(torch.int32)
torch_token_type_ids = torch.zeros((batch_size, sequence_size), dtype=torch.int32)
torch_position_ids = torch.zeros((batch_size, sequence_size), dtype=torch.int32)
torch_attention_mask = torch.zeros(1, sequence_size) if bert == ttnn_optimized_bert else None
torch_position_ids = create_position_ids_from_input_ids(input_ids=input_ids, padding_idx=config.pad_token_id)

if bert == ttnn_bert:
tt_model_name = f"ttnn_{model_name}"
elif bert == ttnn_optimized_bert:
tt_model_name = f"ttnn_{model_name}_optimized"
else:
raise ValueError(f"Unknown functional_roberta: {bert}")
tt_model_name = f"ttnn_{model_name}_optimized"

parameters = preprocess_model_parameters(
model_name=tt_model_name,
Expand Down Expand Up @@ -106,3 +100,7 @@ def test_performance(device, use_program_cache, model_name, batch_size, sequence
logger.info(f"Compile time: {inference_and_compile_time - inference_time}")
logger.info(f"Inference time: {inference_time}")
logger.info(f"Samples per second: {1 / inference_time * batch_size}")

assert (
inference_time < expected_inference_time
), f"Expected inference time: {expected_inference_time} Actual inference time: {inference_time}"
21 changes: 0 additions & 21 deletions models/experimental/functional_roberta/README.md

This file was deleted.

4 changes: 4 additions & 0 deletions tests/scripts/run_performance.sh
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ run_perf_models_other() {

env pytest -n auto models/demos/convnet_mnist/tests -m $test_marker

env pytest -n auto models/demos/roberta/tests/test_performance.py -m $test_marker

## Merge all the generated reports
env python models/perf/merge_perf_results.py
}
Expand Down Expand Up @@ -96,6 +98,8 @@ run_device_perf_models() {

env pytest models/demos/convnet_mnist/tests/ -m $test_marker

env pytest models/demos/roberta/tests/ -m $test_marker

if [ "$tt_arch" == "grayskull" ]; then
#TODO(MO): Until #6560 is fixed, GS device profiler test are grouped with
#Model Device perf regression tests to make sure thy run on no-soft-reset BMs
Expand Down
2 changes: 2 additions & 0 deletions tests/scripts/single_card/run_single_card_demo_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ run_common_func_tests() {
# ConvNet Mnist
pytest --disable-warnings models/demos/convnet_mnist/demo/demo.py --timeout 600; fail+=$?

#RoBERTa
pytest --disable-warnings models/demos/roberta/demo/demo.py --timeout 600; fail+=$?
return $fail
}

Expand Down
Loading

0 comments on commit 8df21f2

Please sign in to comment.