Skip to content

Commit

Permalink
remove _init_parameters attr for gpu ops
Browse files Browse the repository at this point in the history
  • Loading branch information
Cathy0908 committed Dec 24, 2024
1 parent 949d534 commit 1fb30f8
Show file tree
Hide file tree
Showing 26 changed files with 5 additions and 54 deletions.
30 changes: 4 additions & 26 deletions data_juicer/core/ray_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,23 +119,12 @@ def _run_single_op(self, op):
1) if op.is_batched_op() else 1
if isinstance(op, Mapper):
if op.use_cuda():
try:
init_params = op._init_parameters
except AttributeError:
raise ValueError(
f'This Op[{op._name}] enables CUDA, you should add'
' `_init_parameters` attribute to the Op class by '
'add `self._init_parameters = self.remove_extra_parameters(locals())`' # noqa: E501
' after super().__init__().')
op_args = init_params.pop('args', ())
op_kwargs = init_params.pop('kwargs', {})
op_kwargs.update(init_params)

op_kwargs = op._op_cfg[op._name]
self.data = self.data.map_batches(
op.__class__,
fn_args=None,
fn_kwargs=None,
fn_constructor_args=op_args,
fn_constructor_args=None,
fn_constructor_kwargs=op_kwargs,
batch_size=batch_size,
num_gpus=num_gpus,
Expand All @@ -148,23 +137,12 @@ def _run_single_op(self, op):
num_gpus=num_gpus)
elif isinstance(op, Filter):
if op.use_cuda():
try:
init_params = op._init_parameters
except AttributeError:
raise ValueError(
f'This Op[{op._name}] enables CUDA, you should add'
' `_init_parameters` attribute to the Op class by '
'add `self._init_parameters = self.remove_extra_parameters(locals())`' # noqa: E501
' after super().__init__().')
op_args = init_params.pop('args', ())
op_kwargs = init_params.pop('kwargs', {})
op_kwargs.update(init_params)

op_kwargs = op._op_cfg[op._name]
self.data = self.data.map_batches(
op.__class__,
fn_args=None,
fn_kwargs=None,
fn_constructor_args=op_args,
fn_constructor_args=None,
fn_constructor_kwargs=op_kwargs,
batch_size=batch_size,
num_gpus=num_gpus,
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/image_aesthetics_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,6 @@ def __init__(self,
"""

super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
if hf_scorer_model == '':
hf_scorer_model = \
'shunk031/aesthetics-predictor-v2-sac-logos-ava1-l14-linearMSE'
Expand Down
2 changes: 0 additions & 2 deletions data_juicer/ops/filter/image_nsfw_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,8 +42,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

self.score_threshold = score_threshold
if any_or_all not in ['any', 'all']:
raise ValueError(f'Keep strategy [{any_or_all}] is not supported. '
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/image_pair_similarity_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_score = min_score
self.max_score = max_score
if any_or_all not in ['any', 'all']:
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/image_text_matching_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_score = min_score
self.max_score = max_score
if reduce_mode not in ['avg', 'max', 'min']:
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/image_text_similarity_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_score = min_score
self.max_score = max_score
if reduce_mode not in ['avg', 'max', 'min']:
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/image_watermark_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.prob_threshold = prob_threshold
if any_or_all not in ['any', 'all']:
raise ValueError(f'Keep strategy [{any_or_all}] is not supported. '
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/phrase_grounding_recall_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,7 +115,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_recall = min_recall
self.max_recall = max_recall
if reduce_mode not in ['avg', 'max', 'min']:
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/video_aesthetics_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def __init__(self,
"""

super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
if hf_scorer_model == '':
hf_scorer_model = \
'shunk031/aesthetics-predictor-v2-sac-logos-ava1-l14-linearMSE'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_score = min_score
self.max_score = max_score
if frame_sampling_method not in ['all_keyframes', 'uniform']:
Expand Down
2 changes: 0 additions & 2 deletions data_juicer/ops/filter/video_motion_score_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,8 +82,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

self.min_score = min_score
self.max_score = max_score
self.sampling_fps = sampling_fps
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/video_nsfw_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.score_threshold = score_threshold
if frame_sampling_method not in ['all_keyframes', 'uniform']:
raise ValueError(
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/video_ocr_area_ratio_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.min_area_ratio = min_area_ratio
self.max_area_ratio = max_area_ratio
self.frame_sample_num = frame_sample_num
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/video_tagging_from_frames_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
if contain not in ['any', 'all']:
raise ValueError(f'the containing type [{contain}] is not '
f'supported. Can only be one of ["any", "all"].')
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/filter/video_watermark_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.prob_threshold = prob_threshold
if frame_sampling_method not in ['all_keyframes', 'uniform']:
raise ValueError(
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/mapper/generate_qa_from_examples_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,6 @@ def __init__(self,
:param kwargs: Extra keyword arguments.
"""
super().__init__(**kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if not seed_file:
raise ValueError(
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/mapper/generate_qa_from_text_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,6 @@ def __init__(self,
"""

super().__init__(**kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if output_pattern is None:
self.output_pattern = r'Human:(.*?)Assistant:(.*?)(?=Human|$)' # noqa: E501
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/mapper/image_captioning_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if keep_candidate_mode not in [
'random_any', 'similar_one_simhash', 'all'
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/mapper/image_tagging_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
self.model_key = prepare_model(
model_type='recognizeAnything',
pretrained_model_name_or_path='ram_plus_swin_large_14m.pth',
Expand Down
2 changes: 1 addition & 1 deletion data_juicer/ops/mapper/optimize_qa_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def __init__(self,
:param kwargs: Extra keyword arguments.
"""
super().__init__(**kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

self.system_prompt = system_prompt or self.DEFAULT_SYSTEM_PROMPT
self.input_template = input_template or self.DEFAULT_INPUT_TEMPLATE
self.qa_pair_template = qa_pair_template or \
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@ def __init__(self, keep_original_sample: bool = True, *args, **kwargs):
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
AUTOINSTALL.check([
'transformers', 'transformers_stream_generator', 'einops',
'accelerate', 'tiktoken'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def __init__(
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if keep_candidate_mode not in [
'random_any', 'similar_one_simhash', 'all'
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
AUTOINSTALL.check([
'torch',
'transformers',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,6 @@ def __init__(
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if keep_candidate_mode not in [
'random_any', 'similar_one_simhash', 'all'
Expand Down
1 change: 0 additions & 1 deletion data_juicer/ops/mapper/video_tagging_from_audio_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())
AUTOINSTALL.check(['torchaudio'])
self.model_key = prepare_model(model_type='huggingface',
pretrained_model_name_or_path=hf_ast,
Expand Down
2 changes: 0 additions & 2 deletions data_juicer/ops/mapper/video_tagging_from_frames_mapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,6 @@ def __init__(self,
:param kwargs: extra args
"""
super().__init__(*args, **kwargs)
self._init_parameters = self.remove_extra_parameters(locals())

if frame_sampling_method not in ['all_keyframes', 'uniform']:
raise ValueError(
f'Frame sampling method [{frame_sampling_method}] is not '
Expand Down

0 comments on commit 1fb30f8

Please sign in to comment.