Skip to content

Commit

Permalink
fix facebook/opt-125m not working issue (#2824)
Browse files Browse the repository at this point in the history
Signed-off-by: Wang, Yi A <[email protected]>
  • Loading branch information
sywangyi authored Dec 12, 2024
1 parent c3bd721 commit bf59118
Showing 1 changed file with 4 additions and 2 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ def __init__(self, prefix: str, weights):
self.offset = 2
self.weight = nn.Parameter(
weights.get_tensor(
f"{prefix + '.' if prefix else ''}decoder.embed_positions.weight"
f"{prefix if prefix else ''}decoder.embed_positions.weight"
)
)

Expand Down Expand Up @@ -317,7 +317,7 @@ def __init__(self, layer_id: int, prefix: str, config: OPTConfig, weights):
super().__init__()
self.process_group = weights.process_group
self.hidden_size = config.hidden_size
prefix = f"{prefix + '.' if prefix else ''}decoder.layers.{layer_id}"
prefix = f"{prefix if prefix else ''}decoder.layers.{layer_id}"
self.self_attn = OPTAttention(
config,
prefix=f"{prefix}.self_attn",
Expand Down Expand Up @@ -755,6 +755,8 @@ def forward(
class OPTForCausalLM(OPTPreTrainedModel):
def __init__(self, prefix, config, weights):
super().__init__(config)
if not prefix and any(s.startswith("model") for s in weights.routing.keys()):
prefix = "model"

self.model = OPTModel(prefix, config, weights)

Expand Down

0 comments on commit bf59118

Please sign in to comment.