Skip to content

Commit

Permalink
Merge branch 'main' into release/v0.4.0
Browse files Browse the repository at this point in the history
  • Loading branch information
dakinggg authored Nov 18, 2023
2 parents 3cf55ed + 269ded6 commit bf77fee
Showing 1 changed file with 75 additions and 112 deletions.
187 changes: 75 additions & 112 deletions tests/test_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,17 +304,13 @@ def test_full_forward_and_backward_t5_small(batch_size: int = 2):
assert not torch.equal(original_params, updated_params)


@pytest.mark.gpu
@pytest.mark.parametrize(
'attn_impl,precision',
[('torch', torch.float16), ('torch', torch.bfloat16),
pytest.param('flash', torch.float16, marks=pytest.mark.gpu),
pytest.param('flash', torch.bfloat16, marks=pytest.mark.gpu)])
def test_determinism(attn_impl: str, precision: torch.dtype):
if not torch.cuda.is_available():
pytest.skip(
'This test requires CUDA to be available in order to run with bfloat16 precision.'
)

conf_path = 'scripts/train/yamls/pretrain/testing.yaml'
with open(conf_path) as f:
test_cfg = om.load(f)
Expand Down Expand Up @@ -519,10 +515,12 @@ def test_mpt_creation(norm_type: str, no_bias: bool, tie_word_embeddings: bool):
assert block.resid_ffn_dropout.p == 0.2


@pytest.mark.parametrize('attention_impl,device', [('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu')])
@pytest.mark.parametrize('attention_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu)
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
'rope': False
Expand Down Expand Up @@ -550,24 +548,20 @@ def test_mpt_creation(norm_type: str, no_bias: bool, tie_word_embeddings: bool):
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_forward_with_padding(attention_impl: str, device: str,
pos_emb_config: dict, tie_word_embeddings: bool):
def test_forward_with_padding(attention_impl: str, pos_emb_config: dict,
tie_word_embeddings: bool):
# Test that different placement of padding does not affect the output.
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attention_impl} attention.'
)
alibi = pos_emb_config['alibi']
if alibi and attention_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')

rope = pos_emb_config['rope']
if rope and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if rope and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')

composer_device = get_device(device)
composer_device = get_device(None)

hf_config = MPTConfig(
init_device='cpu',
Expand Down Expand Up @@ -743,12 +737,12 @@ def test_advanced_mask_building(attention_impl: str):
assert torch.equal(attn_bias, expected_attn_bias)


@pytest.mark.parametrize('attention_impl,device,precision', [
('torch', 'cpu', 'fp32'),
('flash', 'gpu', 'amp_bf16'),
('triton', 'gpu', 'amp_bf16'),
('torch', 'gpu', 'amp_bf16'),
('torch', 'gpu', 'fp32'),
@pytest.mark.parametrize('attention_impl,precision', [
('torch', 'fp32'),
pytest.param('flash', 'amp_bf16', marks=pytest.mark.gpu),
pytest.param('triton', 'amp_bf16', marks=pytest.mark.gpu),
pytest.param('torch', 'amp_bf16', marks=pytest.mark.gpu),
pytest.param('torch', 'fp32', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
Expand Down Expand Up @@ -777,25 +771,21 @@ def test_advanced_mask_building(attention_impl: str):
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_generate(attention_impl: str, device: str, precision: str,
pos_emb_config: dict, tie_word_embeddings: bool):
def test_generate(attention_impl: str, precision: str, pos_emb_config: dict,
tie_word_embeddings: bool):
# Test that generate works, and produces the same output with or without
# padding in the input.
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attention_impl} attention.'
)
if pos_emb_config['alibi'] and attention_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')

if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')
if attention_impl == 'torch' and precision == 'amp_bf16' and tie_word_embeddings == False:
pytest.skip(f'This test configuration has precision / sampling issues.')

composer_device = get_device(device)
composer_device = get_device(None)

hf_config = MPTConfig(
init_device='cpu',
Expand Down Expand Up @@ -878,8 +868,6 @@ def test_generate(attention_impl: str, device: str, precision: str,
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_generate_with_device_map(tmp_path: pathlib.Path, world_size: int,
use_cache: bool, tie_word_embeddings: bool):
if not torch.cuda.is_available():
pytest.skip(f'This test requires CUDA to be available.')
if not torch.cuda.device_count() >= world_size:
pytest.skip(f'This test requires {world_size} GPUs.')

Expand Down Expand Up @@ -978,11 +966,11 @@ def test_save_from_pretrained(tmp_path: pathlib.Path):
check_hf_model_equivalence(mpt, mpt2)


@pytest.mark.parametrize('attn_impl,device', [
('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu'),
@pytest.mark.parametrize('attn_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
Expand Down Expand Up @@ -1011,22 +999,17 @@ def test_save_from_pretrained(tmp_path: pathlib.Path):
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_forward_with_cache_and_padding(attn_impl: str, device: str,
pos_emb_config: dict,
def test_forward_with_cache_and_padding(attn_impl: str, pos_emb_config: dict,
tie_word_embeddings: bool):
# Tests that the result is the same with or without padding when using kv caching
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attn_impl} attention.'
)
if pos_emb_config['alibi'] and attn_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')
if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')

composer_device = get_device(device)
composer_device = get_device(None)

hf_config = MPTConfig(
init_device='cpu',
Expand Down Expand Up @@ -1120,11 +1103,11 @@ def test_forward_with_cache_and_padding(attn_impl: str, device: str,
rtol=1e-6)


@pytest.mark.parametrize('attn_impl,device', [
('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu'),
@pytest.mark.parametrize('attn_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
Expand Down Expand Up @@ -1153,23 +1136,19 @@ def test_forward_with_cache_and_padding(attn_impl: str, device: str,
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_forward_with_cache(attn_impl: str, device: str, pos_emb_config: dict,
def test_forward_with_cache(attn_impl: str, pos_emb_config: dict,
tie_word_embeddings: bool):
# Test that model forward with and without the key-value cache produces the
# same output.
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attn_impl} attention.'
)
if pos_emb_config['alibi'] and attn_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')

if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')

composer_device = get_device(device)
composer_device = get_device(None)

hf_config = MPTConfig(
init_device='cpu',
Expand Down Expand Up @@ -1264,11 +1243,11 @@ def test_forward_with_cache(attn_impl: str, device: str, pos_emb_config: dict,
)


@pytest.mark.parametrize('attn_impl,device', [
('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu'),
@pytest.mark.parametrize('attn_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
Expand Down Expand Up @@ -1297,20 +1276,16 @@ def test_forward_with_cache(attn_impl: str, device: str, pos_emb_config: dict,
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_generate_with_past_kv(attn_impl: str, device: str,
pos_emb_config: dict, tie_word_embeddings: bool):
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attn_impl} attention.'
)
def test_generate_with_past_kv(attn_impl: str, pos_emb_config: dict,
tie_word_embeddings: bool):
if pos_emb_config['alibi'] and attn_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')
if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')

composer_device = get_device(device)
composer_device = get_device(None)

hf_config = MPTConfig(
init_device='cpu',
Expand Down Expand Up @@ -1368,11 +1343,11 @@ def test_generate_with_past_kv(attn_impl: str, device: str,
hf_config.d_model)


@pytest.mark.parametrize('attn_impl,device', [
('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu'),
@pytest.mark.parametrize('attn_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('generation_kwargs', [{
'max_new_tokens': 2,
Expand Down Expand Up @@ -1412,24 +1387,22 @@ def test_generate_with_past_kv(attn_impl: str, device: str,
},
}])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_generation_kwargs_dont_crash(attn_impl: str, device: str,
def test_generation_kwargs_dont_crash(attn_impl: str,
generation_kwargs: Dict[str, Any],
pos_emb_config: dict,
tie_word_embeddings: bool):
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attn_impl} attention.'
)
if pos_emb_config['alibi'] and attn_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')

if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')
composer_device = get_device(device)
if device == 'gpu': # Switch deteminism off
composer_device = get_device(None)

if composer_device.name == 'gpu':
torch.use_deterministic_algorithms(False)

hf_config = MPTConfig(
init_device='cpu',
d_model=128,
Expand Down Expand Up @@ -1463,7 +1436,8 @@ def test_generation_kwargs_dont_crash(attn_impl: str, device: str,
_ = mpt.generate(input_ids=no_padding_input_ids,
attention_mask=no_padding_attention_mask,
**generation_kwargs)
if device == 'gpu': # Switch deteminism back on

if composer_device.name == 'gpu':
reproducibility.configure_deterministic_mode()


Expand Down Expand Up @@ -1499,10 +1473,6 @@ def test_generation_kwargs_dont_crash(attn_impl: str, device: str,
def test_model_to(attention_impl: str, pos_emb_config: dict,
tie_word_embeddings: bool):
# test that moving the model to diff devices and dtypes in diff ways does not break the model
if not torch.cuda.is_available():
pytest.skip(
f'This test requires CUDA to be available in order to run with {attention_impl} attention.'
)
if pos_emb_config['alibi'] and attention_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')

Expand Down Expand Up @@ -1597,11 +1567,11 @@ def test_alibi_vs_hf():
torch.testing.assert_close(alibi_bias_hf, alibi_bias_m)


@pytest.mark.parametrize('attn_impl,device', [
('torch', 'cpu'),
('flash', 'gpu'),
('triton', 'gpu'),
('torch', 'gpu'),
@pytest.mark.parametrize('attn_impl', [
'torch',
pytest.param('flash', marks=pytest.mark.gpu),
pytest.param('triton', marks=pytest.mark.gpu),
pytest.param('torch', marks=pytest.mark.gpu),
])
@pytest.mark.parametrize('pos_emb_config', [{
'alibi': False,
Expand Down Expand Up @@ -1633,24 +1603,19 @@ def test_alibi_vs_hf():
@pytest.mark.parametrize('output_hidden_states', [True, False])
@pytest.mark.parametrize('tie_word_embeddings', [True, False])
def test_forward_with_output_attentions_and_output_hidden_states(
attn_impl: str, device: str, pos_emb_config: dict,
output_attentions: bool, output_hidden_states: bool,
tie_word_embeddings: bool):
attn_impl: str, pos_emb_config: dict, output_attentions: bool,
output_hidden_states: bool, tie_word_embeddings: bool):
# Test that model forward with output_attentions_and_output_hidden_states
if not torch.cuda.is_available() and device == 'gpu':
pytest.skip(
f'This test requires CUDA to be available in order to run with {attn_impl} attention.'
)
if pos_emb_config['alibi'] and attn_impl == 'flash':
pytest.skip(f'alibi only implemented with torch and triton attention.')
if output_attentions and attn_impl in ['flash', 'triton']:
pytest.skip(f'output_attentions only implemented with torch attention.')
if pos_emb_config['rope'] and pos_emb_config['rope_impl'] == 'dail' and (
device != 'gpu' or not is_flash_v2_installed()):
if pos_emb_config['rope'] and pos_emb_config[
'rope_impl'] == 'dail' and not is_flash_v2_installed():
pytest.skip(
f'dail implementation of rope requires gpu and flash attention 2.')

composer_device = get_device(device)
composer_device = get_device(None)

n_layers = 2

Expand Down Expand Up @@ -1708,8 +1673,6 @@ def test_hf_init(tmp_path: pathlib.Path,
init_device: str,
world_size: int,
batch_size: int = 1):
if not torch.cuda.is_available():
pytest.skip(f'This test requires CUDA to be available.')
if not torch.cuda.device_count() >= world_size:
pytest.skip(f'This test requires {world_size} GPUs.')

Expand Down

0 comments on commit bf77fee

Please sign in to comment.