From caf001a0cc26b88e7d872fa5b45e1bcb2988b155 Mon Sep 17 00:00:00 2001 From: semio Date: Wed, 15 Mar 2017 10:20:29 +0800 Subject: [PATCH 1/4] bug fix for translate_column when not_found == 'include', we should include the origin value if there is one, instead insert nan for all. --- ddf_utils/transformer.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/ddf_utils/transformer.py b/ddf_utils/transformer.py index 051ef8f..e6b5ed3 100644 --- a/ddf_utils/transformer.py +++ b/ddf_utils/transformer.py @@ -40,8 +40,14 @@ def _translate_column_inline(df, column, target_column, dictionary, df_new[target_column] = df_new[column].map( lambda x: dictionary[x]) if not_found == 'include': - df_new[target_column] = df_new[column].map( - lambda x: dictionary[x] if x in dictionary.keys() else x) + if target_column not in df_new.columns: + # create a new column: if a key not in the mappings, return NaN + df_new[target_column] = df_new[column].map( + lambda x: dictionary[x] if x in dictionary.keys() else np.nan) + else: # if a key not in the mappings, use the original value + for i, x in df_new[column].iteritems(): + if x in dictionary.keys(): + df_new.ix[i, target_column] = dictionary[x] return df_new From 63731f810d81b1dca4455b5a53b47ed757442283 Mon Sep 17 00:00:00 2001 From: semio Date: Wed, 15 Mar 2017 10:21:54 +0800 Subject: [PATCH 2/4] bug fix for trend_bridge don't run bridge twice when there is no bridge end data! --- ddf_utils/chef/procedure.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/ddf_utils/chef/procedure.py b/ddf_utils/chef/procedure.py index aafd266..2ae52f7 100644 --- a/ddf_utils/chef/procedure.py +++ b/ddf_utils/chef/procedure.py @@ -944,14 +944,14 @@ def trend_bridge(ingredient: BaseIngredient, bridge_start, bridge_end, bridge_le # calculate trend bridge on each group res_grouped = [] for g, df in start_group: - gstart = df + gstart = df.copy() try: gend = end_group.get_group(g) except KeyError: # no new data available for this group - logger.warning("no data for group: " + g) + logger.warning("no data for bridge end: " + g) bridged = gstart[bridge_start['column']] - - bridged = tb(gstart[bridge_start['column']], gend[bridge_end['column']], bridge_length) + else: + bridged = tb(gstart[bridge_start['column']], gend[bridge_end['column']], bridge_length) res_grouped.append((g, bridged)) @@ -976,4 +976,3 @@ def trend_bridge(ingredient: BaseIngredient, bridge_start, bridge_end, bridge_le return ProcedureResult(result, start.key, merged) else: return ProcedureResult(result, start.key, {target_col: result_data}) - From 822381605a77c2345dfd1c0e8c43e9f4c743211c Mon Sep 17 00:00:00 2001 From: semio Date: Thu, 16 Mar 2017 15:02:53 +0800 Subject: [PATCH 3/4] improve depedency checking closes #53 --- ddf_utils/chef/cook.py | 4 ++++ ddf_utils/chef/dag.py | 20 ++++++++++++++++++-- 2 files changed, 22 insertions(+), 2 deletions(-) diff --git a/ddf_utils/chef/cook.py b/ddf_utils/chef/cook.py index fe56119..556cdd0 100644 --- a/ddf_utils/chef/cook.py +++ b/ddf_utils/chef/cook.py @@ -323,6 +323,10 @@ def run_recipe(recipe, serve=False, outpath=None): # create DAG of recipe dag = build_dag(recipe_) + # check all ingredients availability + for root in dag.roots: + root.detect_missing_dependency() + # now run the recipe dishes = get_dishes(recipe_) diff --git a/ddf_utils/chef/dag.py b/ddf_utils/chef/dag.py index 2a1d6fb..2a31d37 100644 --- a/ddf_utils/chef/dag.py +++ b/ddf_utils/chef/dag.py @@ -73,6 +73,22 @@ def detect_downstream_cycle(self, node=None): t.detect_downstream_cycle(node=node) return False + def detect_missing_dependency(self): + """ + check if every upstream is available in the DAG. + raise error if something is missing + """ + not_found = set() + for n in self.upstream_list: + if not self.dag.has_node(n.node_id): + not_found.add(n.node_id) + if isinstance(n, ProcedureNode) and not n.procedure: + not_found.add(n.node_id) + if len(not_found) > 0: + raise ChefRuntimeError( + "dependency not found/not definded for {}: {}".format(self.node_id, not_found)) + return False + class IngredientNode(BaseNode): """Node for storing dataset ingredients. @@ -116,8 +132,8 @@ def evaluate(self): func = getattr(pc, self.procedure['procedure']) except AttributeError: raise ProcedureError("Not supported: " + self.procedure['procedure']) - except TypeError: - raise ProcedureError("Procedure Error: " + str(self.node_id)) + # except TypeError: + # raise ProcedureError("Procedure Error: " + str(self.node_id)) # check the base ingredients and convert the string id to actual ingredient ingredients = [] From e26caa0254d42e9b0777a49d61b2b9d24e119dc6 Mon Sep 17 00:00:00 2001 From: semio Date: Thu, 16 Mar 2017 15:06:36 +0800 Subject: [PATCH 4/4] add test for invalid recipe --- tests/recipes_fail/test_trend_bridge.yml | 39 ++++++++++++++++++ .../{recipes => recipes_pass}/test_copy.yaml | 0 .../test_debug_option.yaml | 0 .../test_extract_concepts.yaml | 0 .../test_filter_item.yaml | 0 .../test_filter_row.yml | 0 .../test_groupby.yaml | 0 .../test_ingredients.yaml | 0 .../{recipes => recipes_pass}/test_merge.yaml | 0 .../test_run_op.yaml | 0 .../test_serve_procedure.yaml | 0 .../test_serving_section.yaml | 0 .../test_translate_column.yaml | 0 .../test_translate_header.yaml | 0 .../test_trend_bridge.yml | 0 .../test_window.yaml | 0 tests/test_recipe.py | 41 +++++++++++++++---- 17 files changed, 71 insertions(+), 9 deletions(-) create mode 100644 tests/recipes_fail/test_trend_bridge.yml rename tests/{recipes => recipes_pass}/test_copy.yaml (100%) rename tests/{recipes => recipes_pass}/test_debug_option.yaml (100%) rename tests/{recipes => recipes_pass}/test_extract_concepts.yaml (100%) rename tests/{recipes => recipes_pass}/test_filter_item.yaml (100%) rename tests/{recipes => recipes_pass}/test_filter_row.yml (100%) rename tests/{recipes => recipes_pass}/test_groupby.yaml (100%) rename tests/{recipes => recipes_pass}/test_ingredients.yaml (100%) rename tests/{recipes => recipes_pass}/test_merge.yaml (100%) rename tests/{recipes => recipes_pass}/test_run_op.yaml (100%) rename tests/{recipes => recipes_pass}/test_serve_procedure.yaml (100%) rename tests/{recipes => recipes_pass}/test_serving_section.yaml (100%) rename tests/{recipes => recipes_pass}/test_translate_column.yaml (100%) rename tests/{recipes => recipes_pass}/test_translate_header.yaml (100%) rename tests/{recipes => recipes_pass}/test_trend_bridge.yml (100%) rename tests/{recipes => recipes_pass}/test_window.yaml (100%) diff --git a/tests/recipes_fail/test_trend_bridge.yml b/tests/recipes_fail/test_trend_bridge.yml new file mode 100644 index 0000000..01132a9 --- /dev/null +++ b/tests/recipes_fail/test_trend_bridge.yml @@ -0,0 +1,39 @@ +info: + id: test-trend_bridge + +ingredients: + - id: cme1 + dataset: ddf--cme + key: country, year + value: '*' + - id: cme2 + dataset: ddf--cme + key: country, year + value: '*' + +cooking: + datapoints: + - procedure: filter_row + ingredients: + - cme1 + options: + dictionary: + new_col: + from: imr_median + year: [2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, + 2009, 2010, 2011, 2012, 2013] + result: cme3 + - procedure: trend_bridge + ingredients: + - cme1 + options: + bridge_start: + column: imr_lower + bridge_end: + ingredient: cme1231 + column: new_col + bridge_length: 10 + bridge_on: year + target_col: bridged + result: res + diff --git a/tests/recipes/test_copy.yaml b/tests/recipes_pass/test_copy.yaml similarity index 100% rename from tests/recipes/test_copy.yaml rename to tests/recipes_pass/test_copy.yaml diff --git a/tests/recipes/test_debug_option.yaml b/tests/recipes_pass/test_debug_option.yaml similarity index 100% rename from tests/recipes/test_debug_option.yaml rename to tests/recipes_pass/test_debug_option.yaml diff --git a/tests/recipes/test_extract_concepts.yaml b/tests/recipes_pass/test_extract_concepts.yaml similarity index 100% rename from tests/recipes/test_extract_concepts.yaml rename to tests/recipes_pass/test_extract_concepts.yaml diff --git a/tests/recipes/test_filter_item.yaml b/tests/recipes_pass/test_filter_item.yaml similarity index 100% rename from tests/recipes/test_filter_item.yaml rename to tests/recipes_pass/test_filter_item.yaml diff --git a/tests/recipes/test_filter_row.yml b/tests/recipes_pass/test_filter_row.yml similarity index 100% rename from tests/recipes/test_filter_row.yml rename to tests/recipes_pass/test_filter_row.yml diff --git a/tests/recipes/test_groupby.yaml b/tests/recipes_pass/test_groupby.yaml similarity index 100% rename from tests/recipes/test_groupby.yaml rename to tests/recipes_pass/test_groupby.yaml diff --git a/tests/recipes/test_ingredients.yaml b/tests/recipes_pass/test_ingredients.yaml similarity index 100% rename from tests/recipes/test_ingredients.yaml rename to tests/recipes_pass/test_ingredients.yaml diff --git a/tests/recipes/test_merge.yaml b/tests/recipes_pass/test_merge.yaml similarity index 100% rename from tests/recipes/test_merge.yaml rename to tests/recipes_pass/test_merge.yaml diff --git a/tests/recipes/test_run_op.yaml b/tests/recipes_pass/test_run_op.yaml similarity index 100% rename from tests/recipes/test_run_op.yaml rename to tests/recipes_pass/test_run_op.yaml diff --git a/tests/recipes/test_serve_procedure.yaml b/tests/recipes_pass/test_serve_procedure.yaml similarity index 100% rename from tests/recipes/test_serve_procedure.yaml rename to tests/recipes_pass/test_serve_procedure.yaml diff --git a/tests/recipes/test_serving_section.yaml b/tests/recipes_pass/test_serving_section.yaml similarity index 100% rename from tests/recipes/test_serving_section.yaml rename to tests/recipes_pass/test_serving_section.yaml diff --git a/tests/recipes/test_translate_column.yaml b/tests/recipes_pass/test_translate_column.yaml similarity index 100% rename from tests/recipes/test_translate_column.yaml rename to tests/recipes_pass/test_translate_column.yaml diff --git a/tests/recipes/test_translate_header.yaml b/tests/recipes_pass/test_translate_header.yaml similarity index 100% rename from tests/recipes/test_translate_header.yaml rename to tests/recipes_pass/test_translate_header.yaml diff --git a/tests/recipes/test_trend_bridge.yml b/tests/recipes_pass/test_trend_bridge.yml similarity index 100% rename from tests/recipes/test_trend_bridge.yml rename to tests/recipes_pass/test_trend_bridge.yml diff --git a/tests/recipes/test_window.yaml b/tests/recipes_pass/test_window.yaml similarity index 100% rename from tests/recipes/test_window.yaml rename to tests/recipes_pass/test_window.yaml diff --git a/tests/test_recipe.py b/tests/test_recipe.py index b9676f3..08e0677 100644 --- a/tests/test_recipe.py +++ b/tests/test_recipe.py @@ -11,21 +11,44 @@ import pytest import glob -all_test_recipes = glob.glob('recipes/test_*') +test_recipes_pass = glob.glob('recipes_pass/test_*') +test_recipes_fail = glob.glob('recipes_fail/test_*') + + +@pytest.fixture(scope='session', + params=test_recipes_pass) +def recipe_file_pass(request): + return request.param + @pytest.fixture(scope='session', - params=all_test_recipes) -def recipe_file(request): + params=test_recipes_fail) +def recipe_file_fail(request): return request.param -def test_run_recipe(recipe_file, to_disk=False): - print('running test: ' + recipe_file) - recipe = chef.build_recipe(recipe_file) +def test_run_recipe_pass(recipe_file_pass, to_disk=False): + print('running test: ' + recipe_file_pass) + recipe = chef.build_recipe(recipe_file_pass) + if to_disk: + outdir = tempfile.mkdtemp() + print('tmpdir: ' + outdir) + else: + outdir = None + + chef.run_recipe(recipe, to_disk, outdir) + + +def test_run_recipe_fail(recipe_file_fail, to_disk=False): + print('running test: ' + recipe_file_fail) + recipe = chef.build_recipe(recipe_file_fail) if to_disk: outdir = tempfile.mkdtemp() print('tmpdir: ' + outdir) - chef.run_recipe(recipe, True, outdir) else: - _ = chef.run_recipe(recipe) - assert 1 + outdir = None + try: + chef.run_recipe(recipe, to_disk, outdir) + except: + return + assert 0