From cf52e98c9379e6c576abf5edde4bb0a25562c887 Mon Sep 17 00:00:00 2001 From: Kevin Gurney Date: Wed, 30 Aug 2023 14:59:07 -0400 Subject: [PATCH] Add tests for indexing `RecordBatch` columns by name. --- matlab/test/arrow/tabular/tRecordBatch.m | 165 ++++++++++++++++++++++- 1 file changed, 164 insertions(+), 1 deletion(-) diff --git a/matlab/test/arrow/tabular/tRecordBatch.m b/matlab/test/arrow/tabular/tRecordBatch.m index d9c3c98652b08..d418f00fcafe3 100644 --- a/matlab/test/arrow/tabular/tRecordBatch.m +++ b/matlab/test/arrow/tabular/tRecordBatch.m @@ -109,7 +109,7 @@ function UnsupportedColumnIndexType(tc) TOriginal = table(1, 2, 3); arrowRecordBatch = arrow.recordBatch(TOriginal); fcn = @() arrowRecordBatch.column(datetime(2022, 1, 3)); - tc.verifyError(fcn, "arrow:badsubscript:NonNumeric"); + tc.verifyError(fcn, "arrow:badsubscript:UnsupportedIndexType"); end function ErrorIfIndexIsNonScalar(tc) @@ -223,6 +223,169 @@ function SchemaNoSetter(tc) "MATLAB:class:SetProhibited"); end + function GetColumnByName(testCase) + % Verify that columns can be accessed using a field name. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=["A", "B", "C"] ... + ); + + expected = arrow.array([1, 2, 3]); + actual = recordBatch.column("A"); + testCase.verifyEqual(actual, expected); + + expected = arrow.array(["A", "B", "C"]); + actual = recordBatch.column("B"); + testCase.verifyEqual(actual, expected); + + expected = arrow.array([true, false, true]); + actual = recordBatch.column("C"); + testCase.verifyEqual(actual, expected); + end + + function GetColumnByNameWithEmptyString(testCase) + % Verify that a column whose Field name is the empty string ("") + % can be accessed using the column() method. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=["A", "", "C"] ... + ); + + expected = arrow.array(["A", "B", "C"]); + actual = recordBatch.column(""); + testCase.verifyEqual(actual, expected) + end + + function GetColumnByNameWithWhitespace(testCase) + % Verify that a column whose Field name contains only whitespace + % characters can be accessed using the column() method. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=[" ", " ", " "] ... + ); + + expected = arrow.array([1, 2, 3]); + actual = recordBatch.column(" "); + testCase.verifyEqual(actual, expected); + + expected = arrow.array(["A", "B", "C"]); + actual = recordBatch.column(" "); + testCase.verifyEqual(actual, expected); + + expected = arrow.array([true, false, true]); + actual = recordBatch.column(" "); + testCase.verifyEqual(actual, expected); + end + + function ErrorIfColumnNameDoesNotExist(testCase) + % Verify that an error is thrown when trying to access a column + % with a Field name that is not part of the Schema of the RecordBatch. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=["A", "B", "C"] ... + ); + + % Matching should be case sensitive. + name = "a"; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + + name = "aA"; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + + name = "D"; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + + name = ""; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + + name = " "; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + end + + function ErrorIfAmbiguousColumnName(testCase) + % Verify that an error is thrown when trying to access a column + % with a name that is ambiguous / occurs more than once in the + % Schema of the RecordBatch. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + arrow.array([days(1), days(2), days(3)]), ... + ColumnNames=["A", "A", "B", "B"] ... + ); + + name = "A"; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + + name = "B"; + testCase.verifyError(@() recordBatch.column(name), "arrow:tabular:schema:AmbiguousFieldName"); + end + + function GetColumnByNameWithChar(testCase) + % Verify that the column method works when supplied a char + % vector as input. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=["", "B", "123"] ... + ); + + % Should match the first column whose name is the + % empty string (""). + name = char.empty(0, 0); + expected = arrow.array([1, 2, 3]); + actual = recordBatch.column(name); + testCase.verifyEqual(actual, expected); + + name = char.empty(0, 1); + expected = arrow.array([1, 2, 3]); + actual = recordBatch.column(name); + testCase.verifyEqual(actual, expected); + + name = char.empty(1, 0); + expected = arrow.array([1, 2, 3]); + actual = recordBatch.column(name); + testCase.verifyEqual(actual, expected); + + % Should match the second column whose name is "B". + name = 'B'; + expected = arrow.array(["A", "B", "C"]); + actual = recordBatch.column(name); + testCase.verifyEqual(actual, expected); + + % Should match the third field whose name is "123". + name = '123'; + expected = arrow.array([true, false, true]); + actual = recordBatch.column(name); + testCase.verifyEqual(actual, expected); + end + + function ErrorIfColumnNameIsNonScalar(testCase) + % Verify that an error is thrown if a nonscalar string array is + % specified as a column name to the column method. + recordBatch = arrow.tabular.RecordBatch.fromArrays(... + arrow.array([1, 2, 3]), ... + arrow.array(["A", "B", "C"]), ... + arrow.array([true, false, true]), ... + ColumnNames=["A", "B", "C"] ... + ); + + name = ["A", "B", "C"]; + testCase.verifyError(@() recordBatch.column(name), "MATLAB:expectedScalar"); + + name = ["A"; "B"; "C"]; + testCase.verifyError(@() recordBatch.column(name), "MATLAB:expectedScalar"); + end + end methods