From a9977262e4a51a5a76b0c26a085c74bc9b1f052b Mon Sep 17 00:00:00 2001 From: Haibin <1400012807@pku.edu.cn> Date: Thu, 19 Dec 2024 16:14:19 +0800 Subject: [PATCH] meta tags aggregator --- data_juicer/ops/aggregator/meta_tags_aggregator.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/data_juicer/ops/aggregator/meta_tags_aggregator.py b/data_juicer/ops/aggregator/meta_tags_aggregator.py index 454602657..a24025392 100644 --- a/data_juicer/ops/aggregator/meta_tags_aggregator.py +++ b/data_juicer/ops/aggregator/meta_tags_aggregator.py @@ -75,7 +75,7 @@ class MetaTagsAggregator(Aggregator): '{tag_strs}') DEFAULT_TAG_TEMPLATE = '| {tag} | {cnt} |' - DEFAULT_OUTPUT_PATTERN = r'\n(.*?)归类为(.*?)(\Z|\n)' + DEFAULT_OUTPUT_PATTERN = r'\n(\w+)归类为(\w+)($|\n)' def __init__(self, api_model: str = 'gpt-4o',