Skip to content

Commit

Permalink
use sharegpt from prompt strategies now
Browse files Browse the repository at this point in the history
  • Loading branch information
winglian committed Sep 25, 2023
1 parent 7922664 commit 700ef36
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 13 deletions.
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@
"""Module containing the SimpleShareGPTPromptTokenizingStrategy class"""
from typing import Any, Dict, Optional

from axolotl.prompt_tokenizers import ShareGPTPromptTokenizingStrategy
from axolotl.prompters import ShareGPTPrompterV2


def load(tokenizer, cfg):
def load(tokenizer, cfg, ds_cfg: Optional[Dict[str, Any]] = None):
conversation = (
ds_cfg["conversation"] if ds_cfg and "conversation" in ds_cfg else None
)
return SimpleShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2(),
ShareGPTPrompterV2(conversation=conversation),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
Expand Down
11 changes: 0 additions & 11 deletions src/axolotl/utils/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,6 @@
GPTeacherPromptTokenizingStrategy,
JeopardyPromptTokenizingStrategy,
OpenAssistantPromptTokenizingStrategy,
ShareGPTPromptTokenizingStrategy,
SummarizeTLDRPromptTokenizingStrategy,
)
from axolotl.prompters import (
Expand All @@ -35,7 +34,6 @@
MultipleChoiceConcisePrompter,
MultipleChoiceExplainPrompter,
ReflectAlpacaPrompter,
ShareGPTPrompterV2,
SummarizeTLDRPrompter,
)
from axolotl.utils.dict import DictDefault
Expand Down Expand Up @@ -320,15 +318,6 @@ def for_d_in_datasets(dataset_configs):
)
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
datasets.append(ds_wrapper)
elif d_base_type == "sharegpt":
ds_strategy = ShareGPTPromptTokenizingStrategy(
ShareGPTPrompterV2(d.conversation),
tokenizer,
cfg.train_on_inputs,
cfg.sequence_len,
)
ds_wrapper = TokenizedPromptDataset(ds_strategy, ds)
datasets.append(ds_wrapper)
else:
suffix = ""
if ":load_" in d.type:
Expand Down

0 comments on commit 700ef36

Please sign in to comment.