From 82f086bf74a77ec4f8759c820d53d10a73bcecf8 Mon Sep 17 00:00:00 2001 From: dave-connors-3 <73915542+dave-connors-3@users.noreply.github.com> Date: Fri, 18 Aug 2023 14:01:08 -0500 Subject: [PATCH 1/6] update `Number` class to handle integer values (#8306) * add show test for json data * oh changie my changie * revert unecessary cahnge to fixture * keep decimal class for precision methods, but return __int__ value * jerco updates * update integer type * update other tests * Update .changes/unreleased/Fixes-20230803-093502.yaml --------- Co-authored-by: Emily Rockman --- .../unreleased/Fixes-20230803-093502.yaml | 6 ++++ core/dbt/clients/agate_helper.py | 12 +++++++ tests/functional/show/fixtures.py | 8 +++++ tests/functional/show/test_show.py | 35 ++++++++----------- tests/unit/test_agate_helper.py | 20 +++++------ 5 files changed, 51 insertions(+), 30 deletions(-) create mode 100644 .changes/unreleased/Fixes-20230803-093502.yaml diff --git a/.changes/unreleased/Fixes-20230803-093502.yaml b/.changes/unreleased/Fixes-20230803-093502.yaml new file mode 100644 index 00000000000..ffa0b3aba1c --- /dev/null +++ b/.changes/unreleased/Fixes-20230803-093502.yaml @@ -0,0 +1,6 @@ +kind: Fixes +body: Add explicit support for integers for the show command +time: 2023-08-03T09:35:02.163968-05:00 +custom: + Author: dave-connors-3 + Issue: "8153" diff --git a/core/dbt/clients/agate_helper.py b/core/dbt/clients/agate_helper.py index 642ade4e619..273d9c51923 100644 --- a/core/dbt/clients/agate_helper.py +++ b/core/dbt/clients/agate_helper.py @@ -12,6 +12,17 @@ BOM = BOM_UTF8.decode("utf-8") # '\ufeff' +class Integer(agate.data_types.DataType): + def cast(self, d): + if type(d) == int: + return d + else: + raise agate.exceptions.CastError('Can not parse value "%s" as Integer.' % d) + + def jsonify(self, d): + return d + + class Number(agate.data_types.Number): # undo the change in https://github.com/wireservice/agate/pull/733 # i.e. do not cast True and False to numeric 1 and 0 @@ -47,6 +58,7 @@ def build_type_tester( ) -> agate.TypeTester: types = [ + Integer(null_values=("null", "")), Number(null_values=("null", "")), agate.data_types.Date(null_values=("null", ""), date_format="%Y-%m-%d"), agate.data_types.DateTime(null_values=("null", ""), datetime_format="%Y-%m-%d %H:%M:%S"), diff --git a/tests/functional/show/fixtures.py b/tests/functional/show/fixtures.py index 85bfcd26c29..578c6b5f096 100644 --- a/tests/functional/show/fixtures.py +++ b/tests/functional/show/fixtures.py @@ -2,6 +2,14 @@ select * from {{ ref('sample_seed') }} """ +models__sample_number_model = """ +select + cast(1.0 as int) as float_to_int_field, + 3.0 as float_field, + 4.3 as float_with_dec_field, + 5 as int_field +""" + models__second_model = """ select sample_num as col_one, diff --git a/tests/functional/show/test_show.py b/tests/functional/show/test_show.py index 13e31f74543..a8db41d26a0 100644 --- a/tests/functional/show/test_show.py +++ b/tests/functional/show/test_show.py @@ -6,6 +6,7 @@ models__second_ephemeral_model, seeds__sample_seed, models__sample_model, + models__sample_number_model, models__second_model, models__ephemeral_model, schema_yml, @@ -14,11 +15,12 @@ ) -class BaseTestShow: +class TestShow: @pytest.fixture(scope="class") def models(self): return { "sample_model.sql": models__sample_model, + "sample_number_model.sql": models__sample_number_model, "second_model.sql": models__second_model, "ephemeral_model.sql": models__ephemeral_model, "sql_header.sql": models__sql_header, @@ -28,8 +30,6 @@ def models(self): def seeds(self): return {"sample_seed.csv": seeds__sample_seed} - -class TestNone(BaseTestShow): def test_none(self, project): with pytest.raises( DbtRuntimeError, match="Either --select or --inline must be passed to show" @@ -37,8 +37,6 @@ def test_none(self, project): run_dbt(["seed"]) run_dbt(["show"]) - -class TestSelectModelText(BaseTestShow): def test_select_model_text(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture(["show", "--select", "second_model"]) @@ -48,8 +46,6 @@ def test_select_model_text(self, project): assert "col_two" in log_output assert "answer" in log_output - -class TestSelectMultModelText(BaseTestShow): def test_select_multiple_model_text(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture( @@ -59,8 +55,6 @@ def test_select_multiple_model_text(self, project): assert "sample_num" in log_output assert "sample_bool" in log_output - -class TestSelectSingleMultModelJson(BaseTestShow): def test_select_single_model_json(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture( @@ -70,8 +64,19 @@ def test_select_single_model_json(self, project): assert "sample_num" in log_output assert "sample_bool" in log_output + def test_numeric_values(self, project): + run_dbt(["build"]) + (results, log_output) = run_dbt_and_capture( + ["show", "--select", "sample_number_model", "--output", "json"] + ) + assert "Previewing node 'sample_number_model'" not in log_output + assert "1.0" not in log_output + assert "1" in log_output + assert "3.0" in log_output + assert "4.3" in log_output + assert "5" in log_output + assert "5.0" not in log_output -class TestInlinePass(BaseTestShow): def test_inline_pass(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture( @@ -81,8 +86,6 @@ def test_inline_pass(self, project): assert "sample_num" in log_output assert "sample_bool" in log_output - -class TestShowExceptions(BaseTestShow): def test_inline_fail(self, project): with pytest.raises(DbtException, match="Error parsing inline query"): run_dbt(["show", "--inline", "select * from {{ ref('third_model') }}"]) @@ -91,8 +94,6 @@ def test_inline_fail_database_error(self, project): with pytest.raises(DbtRuntimeError, match="Database Error"): run_dbt(["show", "--inline", "slect asdlkjfsld;j"]) - -class TestEphemeralModels(BaseTestShow): def test_ephemeral_model(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture(["show", "--select", "ephemeral_model"]) @@ -105,8 +106,6 @@ def test_second_ephemeral_model(self, project): ) assert "col_hundo" in log_output - -class TestLimit(BaseTestShow): @pytest.mark.parametrize( "args,expected", [ @@ -121,14 +120,10 @@ def test_limit(self, project, args, expected): results, log_output = run_dbt_and_capture(dbt_args) assert len(results.results[0].agate_table) == expected - -class TestSeed(BaseTestShow): def test_seed(self, project): (results, log_output) = run_dbt_and_capture(["show", "--select", "sample_seed"]) assert "Previewing node 'sample_seed'" in log_output - -class TestSqlHeader(BaseTestShow): def test_sql_header(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture(["show", "--select", "sql_header"]) diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index a2d9fcdb0e2..87658633158 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -121,37 +121,37 @@ def test_datetime_formats(self): self.assertEqual(tbl[0][0], expected) def test_merge_allnull(self): - t1 = agate.Table([(1, "a", None), (2, "b", None)], ("a", "b", "c")) - t2 = agate.Table([(3, "c", None), (4, "d", None)], ("a", "b", "c")) + t1 = agate_helper.table_from_rows([(1, "a", None), (2, "b", None)], ("a", "b", "c")) + t2 = agate_helper.table_from_rows([(3, "c", None), (4, "d", None)], ("a", "b", "c")) result = agate_helper.merge_tables([t1, t2]) self.assertEqual(result.column_names, ("a", "b", "c")) - assert isinstance(result.column_types[0], agate.data_types.Number) + assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Number) self.assertEqual(len(result), 4) def test_merge_mixed(self): - t1 = agate.Table([(1, "a", None), (2, "b", None)], ("a", "b", "c")) - t2 = agate.Table([(3, "c", "dog"), (4, "d", "cat")], ("a", "b", "c")) - t3 = agate.Table([(3, "c", None), (4, "d", None)], ("a", "b", "c")) + t1 = agate_helper.table_from_rows([(1, "a", None), (2, "b", None)], ("a", "b", "c")) + t2 = agate_helper.table_from_rows([(3, "c", "dog"), (4, "d", "cat")], ("a", "b", "c")) + t3 = agate_helper.table_from_rows([(3, "c", None), (4, "d", None)], ("a", "b", "c")) result = agate_helper.merge_tables([t1, t2]) self.assertEqual(result.column_names, ("a", "b", "c")) - assert isinstance(result.column_types[0], agate.data_types.Number) + assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) self.assertEqual(len(result), 4) result = agate_helper.merge_tables([t2, t3]) self.assertEqual(result.column_names, ("a", "b", "c")) - assert isinstance(result.column_types[0], agate.data_types.Number) + assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) self.assertEqual(len(result), 4) result = agate_helper.merge_tables([t1, t2, t3]) self.assertEqual(result.column_names, ("a", "b", "c")) - assert isinstance(result.column_types[0], agate.data_types.Number) + assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) self.assertEqual(len(result), 6) @@ -191,7 +191,7 @@ def test_nocast_bool_01(self): self.assertEqual(len(tbl), len(result_set)) assert isinstance(tbl.column_types[0], agate.data_types.Boolean) - assert isinstance(tbl.column_types[1], agate.data_types.Number) + assert isinstance(tbl.column_types[1], agate_helper.Integer) expected = [ [True, Decimal(1)], From 8d0253668641cd29bfb9fc698bdf107f5f88da9a Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Tue, 5 Sep 2023 20:32:44 -0500 Subject: [PATCH 2/6] account for integer vs number on table merges --- core/dbt/clients/agate_helper.py | 7 +++++++ tests/unit/test_agate_helper.py | 29 +++++++++++++++++++++++------ 2 files changed, 30 insertions(+), 6 deletions(-) diff --git a/core/dbt/clients/agate_helper.py b/core/dbt/clients/agate_helper.py index 273d9c51923..80b01e99486 100644 --- a/core/dbt/clients/agate_helper.py +++ b/core/dbt/clients/agate_helper.py @@ -177,6 +177,13 @@ def __setitem__(self, key, value): elif isinstance(value, _NullMarker): # use the existing value return + # when one table column is Number while another is Integer, let the column become Number on merge + elif isinstance(value, Integer) and isinstance(existing_type, agate.data_types.Number): + # use the existing value + return + elif isinstance(existing_type, Integer) and isinstance(value, agate.data_types.Number): + # overwrite + super().__setitem__(key, value) elif not isinstance(value, type(existing_type)): # actual type mismatch! raise DbtRuntimeError( diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index 87658633158..a4aed5c3e14 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -131,29 +131,46 @@ def test_merge_allnull(self): self.assertEqual(len(result), 4) def test_merge_mixed(self): - t1 = agate_helper.table_from_rows([(1, "a", None), (2, "b", None)], ("a", "b", "c")) - t2 = agate_helper.table_from_rows([(3, "c", "dog"), (4, "d", "cat")], ("a", "b", "c")) - t3 = agate_helper.table_from_rows([(3, "c", None), (4, "d", None)], ("a", "b", "c")) + t1 = agate_helper.table_from_rows( + [(1, "a", None, None), (2, "b", None, None)], ("a", "b", "c", "d") + ) + t2 = agate_helper.table_from_rows( + [(3, "c", "dog", 1), (4, "d", "cat", 5)], ("a", "b", "c", "d") + ) + t3 = agate_helper.table_from_rows( + [(3, "c", None, 1.5), (4, "d", None, 3.5)], ("a", "b", "c", "d") + ) result = agate_helper.merge_tables([t1, t2]) - self.assertEqual(result.column_names, ("a", "b", "c")) + self.assertEqual(result.column_names, ("a", "b", "c", "d")) assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) + assert isinstance(result.column_types[3], agate_helper.Integer) + self.assertEqual(len(result), 4) + + result = agate_helper.merge_tables([t1, t3]) + self.assertEqual(result.column_names, ("a", "b", "c", "d")) + assert isinstance(result.column_types[0], agate_helper.Integer) + assert isinstance(result.column_types[1], agate.data_types.Text) + assert isinstance(result.column_types[2], agate.data_types.Number) + assert isinstance(result.column_types[3], agate.data_types.Number) self.assertEqual(len(result), 4) result = agate_helper.merge_tables([t2, t3]) - self.assertEqual(result.column_names, ("a", "b", "c")) + self.assertEqual(result.column_names, ("a", "b", "c", "d")) assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) + assert isinstance(result.column_types[3], agate.data_types.Number) self.assertEqual(len(result), 4) result = agate_helper.merge_tables([t1, t2, t3]) - self.assertEqual(result.column_names, ("a", "b", "c")) + self.assertEqual(result.column_names, ("a", "b", "c", "d")) assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) assert isinstance(result.column_types[2], agate.data_types.Text) + assert isinstance(result.column_types[3], agate.data_types.Number) self.assertEqual(len(result), 6) def test_nocast_string_types(self): From 52e8ee0613aaeaf953b37b6980878173bff40349 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 6 Sep 2023 09:04:24 -0500 Subject: [PATCH 3/6] add tests for combining number with integer. --- tests/unit/test_agate_helper.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index a4aed5c3e14..991f6f2fd7e 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -165,6 +165,14 @@ def test_merge_mixed(self): assert isinstance(result.column_types[3], agate.data_types.Number) self.assertEqual(len(result), 4) + result = agate_helper.merge_tables([t3, t2]) + self.assertEqual(result.column_names, ("a", "b", "c", "d")) + assert isinstance(result.column_types[0], agate_helper.Integer) + assert isinstance(result.column_types[1], agate.data_types.Text) + assert isinstance(result.column_types[2], agate.data_types.Text) + assert isinstance(result.column_types[3], agate.data_types.Number) + self.assertEqual(len(result), 4) + result = agate_helper.merge_tables([t1, t2, t3]) self.assertEqual(result.column_names, ("a", "b", "c", "d")) assert isinstance(result.column_types[0], agate_helper.Integer) From 62a09104407789c44e6678b436dfe1751a501df3 Mon Sep 17 00:00:00 2001 From: Dave Connors Date: Wed, 6 Sep 2023 09:18:28 -0500 Subject: [PATCH 4/6] add unit test when nulls are added --- tests/functional/show/fixtures.py | 17 +++++++++++++++++ tests/functional/show/test_show.py | 15 +++++++++++++++ 2 files changed, 32 insertions(+) diff --git a/tests/functional/show/fixtures.py b/tests/functional/show/fixtures.py index 578c6b5f096..89b698abea8 100644 --- a/tests/functional/show/fixtures.py +++ b/tests/functional/show/fixtures.py @@ -10,6 +10,23 @@ 5 as int_field """ +models__sample_number_model_with_nulls = """ +select + cast(1.0 as int) as float_to_int_field, + 3.0 as float_field, + 4.3 as float_with_dec_field, + 5 as int_field + +union all + +select + cast(null as int) as float_to_int_field, + cast(null as float) as float_field, + cast(null as float) as float_with_dec_field, + cast(null as int) as int_field + +""" + models__second_model = """ select sample_num as col_one, diff --git a/tests/functional/show/test_show.py b/tests/functional/show/test_show.py index a8db41d26a0..35c0fcb40d4 100644 --- a/tests/functional/show/test_show.py +++ b/tests/functional/show/test_show.py @@ -7,6 +7,7 @@ seeds__sample_seed, models__sample_model, models__sample_number_model, + models__sample_number_model_with_nulls, models__second_model, models__ephemeral_model, schema_yml, @@ -21,6 +22,7 @@ def models(self): return { "sample_model.sql": models__sample_model, "sample_number_model.sql": models__sample_number_model, + "sample_number_model_with_nulls.sql": models__sample_number_model_with_nulls, "second_model.sql": models__second_model, "ephemeral_model.sql": models__ephemeral_model, "sql_header.sql": models__sql_header, @@ -77,6 +79,19 @@ def test_numeric_values(self, project): assert "5" in log_output assert "5.0" not in log_output + def test_numeric_values_with_nulls(self, project): + run_dbt(["build"]) + (results, log_output) = run_dbt_and_capture( + ["show", "--select", "sample_number_model_with_nulls", "--output", "json"] + ) + assert "Previewing node 'sample_number_model_with_nulls'" not in log_output + assert "1.0" not in log_output + assert "1" in log_output + assert "3.0" in log_output + assert "4.3" in log_output + assert "5" in log_output + assert "5.0" not in log_output + def test_inline_pass(self, project): run_dbt(["build"]) (results, log_output) = run_dbt_and_capture( From 377b07be0b221821217c3a0106eb4bbbf4b81b48 Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 6 Sep 2023 10:14:43 -0500 Subject: [PATCH 5/6] cant none as an Integer --- core/dbt/clients/agate_helper.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/core/dbt/clients/agate_helper.py b/core/dbt/clients/agate_helper.py index 80b01e99486..29db25fe570 100644 --- a/core/dbt/clients/agate_helper.py +++ b/core/dbt/clients/agate_helper.py @@ -14,7 +14,10 @@ class Integer(agate.data_types.DataType): def cast(self, d): - if type(d) == int: + # by default agate will cast none as a Number + # but we need to cast it as an Integer to preserve + # the type when merging and unioning tables + if type(d) == int or d is None: return d else: raise agate.exceptions.CastError('Can not parse value "%s" as Integer.' % d) @@ -177,7 +180,7 @@ def __setitem__(self, key, value): elif isinstance(value, _NullMarker): # use the existing value return - # when one table column is Number while another is Integer, let the column become Number on merge + # when one table column is Number while another is Integer, force the column to Number on merge elif isinstance(value, Integer) and isinstance(existing_type, agate.data_types.Number): # use the existing value return @@ -195,8 +198,9 @@ def finalize(self) -> Dict[str, agate.data_types.DataType]: result: Dict[str, agate.data_types.DataType] = {} for key, value in self.items(): if isinstance(value, _NullMarker): - # this is what agate would do. - result[key] = agate.data_types.Number() + # agate would make it a Number but we'll make it Integer so that if this column + # gets merged with another Integer column, it won't get forced to a Number + result[key] = Integer() else: result[key] = value return result From da21befa9d550b927772e42c17a03bd0f4406a8d Mon Sep 17 00:00:00 2001 From: Emily Rockman Date: Wed, 6 Sep 2023 10:19:40 -0500 Subject: [PATCH 6/6] fix null tests --- tests/unit/test_agate_helper.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/unit/test_agate_helper.py b/tests/unit/test_agate_helper.py index 991f6f2fd7e..476c9514280 100644 --- a/tests/unit/test_agate_helper.py +++ b/tests/unit/test_agate_helper.py @@ -127,7 +127,7 @@ def test_merge_allnull(self): self.assertEqual(result.column_names, ("a", "b", "c")) assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) - assert isinstance(result.column_types[2], agate.data_types.Number) + assert isinstance(result.column_types[2], agate_helper.Integer) self.assertEqual(len(result), 4) def test_merge_mixed(self): @@ -153,7 +153,7 @@ def test_merge_mixed(self): self.assertEqual(result.column_names, ("a", "b", "c", "d")) assert isinstance(result.column_types[0], agate_helper.Integer) assert isinstance(result.column_types[1], agate.data_types.Text) - assert isinstance(result.column_types[2], agate.data_types.Number) + assert isinstance(result.column_types[2], agate_helper.Integer) assert isinstance(result.column_types[3], agate.data_types.Number) self.assertEqual(len(result), 4)