Skip to content

Commit

Permalink
PR comments
Browse files Browse the repository at this point in the history
  • Loading branch information
Tamar Grey committed Nov 16, 2022
1 parent fe83e96 commit 4cb94c5
Show file tree
Hide file tree
Showing 5 changed files with 24 additions and 14 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ def _get_cloned_feature_pipelines(
new_parameters = pipeline.parameters
for component in pipeline.component_graph.component_instances.values():
new_parameters = component._handle_partial_dependence_fast_mode(
X_train,
new_parameters,
X=X_train,
target=pipeline.input_target_name,
)

Expand Down
11 changes: 8 additions & 3 deletions evalml/pipelines/components/component_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,14 +103,19 @@ def default_parameters(cls):
def _supported_by_list_API(cls):
return not cls.modifies_target

def _handle_partial_dependence_fast_mode(self, X, pipeline_parameters, target):
def _handle_partial_dependence_fast_mode(
self,
pipeline_parameters,
X=None,
target=None,
):
"""Determines whether or not a component can be used with partial dependence's fast mode.
Args:
X (pd.DataFrame): Holdout data being used for partial dependence calculations.
pipeline_parameters (dict): Pipeline parameters that will be used to create the pipelines
used in partial dependence fast mode.
target (str): The target whose values we are trying to predict.
X (pd.DataFrame, optional): Holdout data being used for partial dependence calculations.
target (str, optional): The target whose values we are trying to predict.
"""
if self._can_be_used_for_fast_partial_dependence:
return pipeline_parameters
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,12 @@ def fit_transform(self, X, y=None):
"""
return self.fit(X, y).transform(X, y)

def _handle_partial_dependence_fast_mode(self, X, pipeline_parameters, target):
def _handle_partial_dependence_fast_mode(
self,
pipeline_parameters,
X=None,
target=None,
):
"""Updates pipeline parameters to not drop any features based off of feature importance.
This is needed, because fast mode refits cloned pipelines on single columns,
Expand All @@ -81,10 +86,10 @@ def _handle_partial_dependence_fast_mode(self, X, pipeline_parameters, target):
pipeline to determine if that feature gets dropped or not.
Args:
X (pd.DataFrame): Holdout data being used for partial dependence calculations.
pipeline_parameters (dict): Pipeline parameters that will be used to create the pipelines
used in partial dependence fast mode.
target (str): The target whose values we are trying to predict.
X (pd.DataFrame, optional): Holdout data being used for partial dependence calculations.
target (str, optional): The target whose values we are trying to predict.
Return:
pipeline_parameters (dict): Pipeline parameters updated to allow the FeatureSelector component
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -129,21 +129,21 @@ def transform(self, X, y=None):
feature_matrix.ww.init(schema=partial_schema)
return feature_matrix

def _handle_partial_dependence_fast_mode(self, X, pipeline_parameters, target):
"""Determines whether or not a DFSTransformer component can be used with partial dependence's fast mode.
def _handle_partial_dependence_fast_mode(self, pipeline_parameters, X, target):
"""Determines whether or not a DFS Transformer component can be used with partial dependence's fast mode.
Note:
This component can be used with partial dependence fast mode only when
all of the features present in the ``features`` parameter are present
in the DataFrame.
Args:
X (pd.DataFrame): Holdout data being used for partial dependence calculations.
pipeline_parameters (dict): Pipeline parameters that will be used to create the pipelines
used in partial dependence fast mode.
target (str): The target whose values we are trying to predict. May be present in the
list of features in the DFS Transformer's parameters, in which case we should ignore it.
X (pd.DataFrame): Holdout data being used for partial dependence calculations.
target (str): The target whose values we are trying to predict. This is used
to know which column to ignore if the target column is present in the list of features
in the DFS Transformer's parameters
"""
dfs_transformer = pipeline_parameters.get("DFS Transformer")
if dfs_transformer is not None:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2871,7 +2871,7 @@ def test_partial_dependence_dfs_transformer_handling_with_multi_output_primitive
pipeline = RegressionPipeline(
[dfs_transformer, "Standard Scaler", "Random Forest Regressor"],
)
# Confirm that the LSA primitive was actually used
# Confirm that a multi-output feature is present
assert any(len(f.get_feature_names()) > 1 for f in features)

pipeline.fit(X_fm, y)
Expand Down

0 comments on commit 4cb94c5

Please sign in to comment.