diff --git a/data_juicer/ops/aggregator/meta_tags_aggregator.py b/data_juicer/ops/aggregator/meta_tags_aggregator.py index 454602657..a24025392 100644 --- a/data_juicer/ops/aggregator/meta_tags_aggregator.py +++ b/data_juicer/ops/aggregator/meta_tags_aggregator.py @@ -75,7 +75,7 @@ class MetaTagsAggregator(Aggregator): '{tag_strs}') DEFAULT_TAG_TEMPLATE = '| {tag} | {cnt} |' - DEFAULT_OUTPUT_PATTERN = r'\n(.*?)归类为(.*?)(\Z|\n)' + DEFAULT_OUTPUT_PATTERN = r'\n(\w+)归类为(\w+)($|\n)' def __init__(self, api_model: str = 'gpt-4o',