From 1eaa8d53302c1363808f889f5a5fbe550ec88304 Mon Sep 17 00:00:00 2001 From: Dewey Dunnington Date: Mon, 16 Dec 2024 21:14:34 -0600 Subject: [PATCH] feat(r): Support matrix objects as fixed-size-list arrays (#692) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Still needs some testing on the stream case, and is unfortunately not very zero copy; however, gets the job done (and I think fixes some cases where we would have otherwise silently handled a matrix as the storage type). Inspired by #691! ``` r library(nanoarrow) df <- data.frame(x = 1:10) df$matrix_col <- matrix(letters[1:20], ncol = 2, byrow = TRUE) array <- as_nanoarrow_array(df) # Default comes back as list_of(character()) convert_array(array) |> tibble::as_tibble() #> # A tibble: 10 × 2 #> x matrix_col #> > #> 1 1 [2] #> 2 2 [2] #> 3 3 [2] #> 4 4 [2] #> 5 5 [2] #> 6 6 [2] #> 7 7 [2] #> 8 8 [2] #> 9 9 [2] #> 10 10 [2] # But can specify matrix convert_array( array, tibble::tibble(x = integer(), matrix_col = matrix(character(), ncol = 2)) ) #> # A tibble: 10 × 2 #> x matrix_col[,1] [,2] #> #> 1 1 a b #> 2 2 c d #> 3 3 e f #> 4 4 g h #> 5 5 i j #> 6 6 k l #> 7 7 m n #> 8 8 o p #> 9 9 q r #> 10 10 s t ``` Created on 2024-12-12 with [reprex v2.1.1](https://reprex.tidyverse.org) --- r/NAMESPACE | 2 + r/R/as-array.R | 36 ++++++ r/R/convert-array.R | 2 + r/R/schema.R | 8 ++ r/man/convert_array.Rd | 2 + r/src/convert.c | 45 +++++--- r/src/convert.h | 4 +- r/src/convert_array_stream.c | 4 +- r/src/materialize.c | 88 ++++++++++++++- r/src/materialize_common.h | 1 + r/tests/testthat/test-as-array.R | 21 ++++ r/tests/testthat/test-convert-array-stream.R | 111 +++++++++++++++++++ r/tests/testthat/test-convert-array.R | 25 +++++ r/tests/testthat/test-schema.R | 4 + src/nanoarrow/device/device.c | 4 +- 15 files changed, 329 insertions(+), 28 deletions(-) diff --git a/r/NAMESPACE b/r/NAMESPACE index 76234961f..c76f380ec 100644 --- a/r/NAMESPACE +++ b/r/NAMESPACE @@ -36,6 +36,7 @@ S3method(as_nanoarrow_array,difftime) S3method(as_nanoarrow_array,factor) S3method(as_nanoarrow_array,integer64) S3method(as_nanoarrow_array,list) +S3method(as_nanoarrow_array,matrix) S3method(as_nanoarrow_array,nanoarrow_array) S3method(as_nanoarrow_array,nanoarrow_buffer) S3method(as_nanoarrow_array,vctrs_unspecified) @@ -100,6 +101,7 @@ S3method(infer_nanoarrow_schema,integer) S3method(infer_nanoarrow_schema,integer64) S3method(infer_nanoarrow_schema,list) S3method(infer_nanoarrow_schema,logical) +S3method(infer_nanoarrow_schema,matrix) S3method(infer_nanoarrow_schema,nanoarrow_array) S3method(infer_nanoarrow_schema,nanoarrow_array_stream) S3method(infer_nanoarrow_schema,nanoarrow_vctr) diff --git a/r/R/as-array.R b/r/R/as-array.R index cf72bf736..e4deff3eb 100644 --- a/r/R/as-array.R +++ b/r/R/as-array.R @@ -194,6 +194,42 @@ as_nanoarrow_array.blob <- function(x, ..., schema = NULL) { as_nanoarrow_array(unclass(x), schema = schema) } +#' @export +as_nanoarrow_array.matrix <- function(x, ..., schema = NULL) { + if (is.null(schema)) { + schema <- infer_nanoarrow_schema(x) + } else { + schema <- as_nanoarrow_schema(schema) + } + + expected_format <- paste0("+w:", ncol(x)) + if (expected_format != schema$format) { + stop( + sprintf( + "Expected schema for matrix with fixed-size list of %d elements but got %s", + ncol(x), + nanoarrow_schema_formatted(schema) + ) + ) + } + + # Raw unclass() doesn't work for matrix() + row_major_data <- t(x) + attributes(row_major_data) <- NULL + + child_array <- as_nanoarrow_array(row_major_data, schema = schema$children[[1]]) + array <- nanoarrow_array_init(schema) + nanoarrow_array_modify( + array, + list( + length = nrow(x), + null_count = 0, + buffers = list(NULL), + children = list(child_array) + ) + ) +} + #' @export as_nanoarrow_array.data.frame <- function(x, ..., schema = NULL) { # We need to override this to prevent the list implementation from handling it diff --git a/r/R/convert-array.R b/r/R/convert-array.R index a33273e70..5e3c15020 100644 --- a/r/R/convert-array.R +++ b/r/R/convert-array.R @@ -68,6 +68,8 @@ #' be converted to [blob::blob()]. #' - [vctrs::list_of()]: List, large list, and fixed-size list types can be #' converted to [vctrs::list_of()]. +#' - [matrix()]: Fixed-size list types can be converted to +#' `matrix(ptype, ncol = fixed_size)`. #' - [data.frame()]: Struct types can be converted to [data.frame()]. #' - [vctrs::unspecified()]: Any type can be converted to [vctrs::unspecified()]; #' however, a warning will be raised if any non-null values are encountered. diff --git a/r/R/schema.R b/r/R/schema.R index e8a4b8d5d..764181a44 100644 --- a/r/R/schema.R +++ b/r/R/schema.R @@ -153,6 +153,14 @@ infer_nanoarrow_schema.vctrs_unspecified <- function(x, ...) { na_na() } +#' @export +infer_nanoarrow_schema.matrix <- function(x, ...) { + na_fixed_size_list( + infer_nanoarrow_schema(unclass(x[integer(0)])), + list_size = ncol(x) + ) +} + #' @export infer_nanoarrow_schema.vctrs_list_of <- function(x, ...) { child_type <- infer_nanoarrow_schema(attr(x, "ptype")) diff --git a/r/man/convert_array.Rd b/r/man/convert_array.Rd index 49e1d49f2..79377e94d 100644 --- a/r/man/convert_array.Rd +++ b/r/man/convert_array.Rd @@ -60,6 +60,8 @@ attribute of \code{to}. be converted to \code{\link[blob:blob]{blob::blob()}}. \item \code{\link[vctrs:list_of]{vctrs::list_of()}}: List, large list, and fixed-size list types can be converted to \code{\link[vctrs:list_of]{vctrs::list_of()}}. +\item \code{\link[=matrix]{matrix()}}: Fixed-size list types can be converted to +\code{matrix(ptype, ncol = fixed_size)}. \item \code{\link[=data.frame]{data.frame()}}: Struct types can be converted to \code{\link[=data.frame]{data.frame()}}. \item \code{\link[vctrs:unspecified]{vctrs::unspecified()}}: Any type can be converted to \code{\link[vctrs:unspecified]{vctrs::unspecified()}}; however, a warning will be raised if any non-null values are encountered. diff --git a/r/src/convert.c b/r/src/convert.c index e232d67d6..aaeaab442 100644 --- a/r/src/convert.c +++ b/r/src/convert.c @@ -29,6 +29,8 @@ static R_xlen_t nanoarrow_vec_size(SEXP vec_sexp, struct PTypeView* ptype_view) { if (ptype_view->vector_type == VECTOR_TYPE_DATA_FRAME) { return nanoarrow_data_frame_size(vec_sexp); + } else if (Rf_isMatrix(vec_sexp)) { + return Rf_nrows(vec_sexp); } else { return Rf_xlength(vec_sexp); } @@ -149,12 +151,7 @@ static void set_converter_data_frame(SEXP converter_xptr, struct RConverter* con } static void set_converter_list_of(SEXP converter_xptr, struct RConverter* converter, - SEXP ptype) { - SEXP child_ptype = Rf_getAttrib(ptype, Rf_install("ptype")); - if (child_ptype == R_NilValue) { - Rf_error("Expected attribute 'ptype' for conversion to list_of"); - } - + SEXP ptype, SEXP child_ptype) { converter->children = (struct RConverter**)ArrowMalloc(1 * sizeof(struct RConverter*)); if (converter->children == NULL) { Rf_error("Failed to allocate converter children array"); @@ -230,7 +227,12 @@ SEXP nanoarrow_converter_from_ptype(SEXP ptype) { SEXP converter_shelter = R_ExternalPtrProtected(converter_xptr); struct RConverter* converter = (struct RConverter*)R_ExternalPtrAddr(converter_xptr); - if (Rf_isObject(ptype)) { + if (Rf_isMatrix(ptype)) { + converter->ptype_view.vector_type = VECTOR_TYPE_MATRIX; + SEXP child_ptype = PROTECT(Rf_allocVector(TYPEOF(ptype), 0)); + set_converter_list_of(converter_xptr, converter, ptype, child_ptype); + UNPROTECT(1); + } else if (Rf_isObject(ptype)) { if (nanoarrow_ptype_is_data_frame(ptype)) { converter->ptype_view.vector_type = VECTOR_TYPE_DATA_FRAME; set_converter_data_frame(converter_xptr, converter, ptype); @@ -238,7 +240,12 @@ SEXP nanoarrow_converter_from_ptype(SEXP ptype) { converter->ptype_view.vector_type = VECTOR_TYPE_BLOB; } else if (Rf_inherits(ptype, "vctrs_list_of")) { converter->ptype_view.vector_type = VECTOR_TYPE_LIST_OF; - set_converter_list_of(converter_xptr, converter, ptype); + SEXP child_ptype = Rf_getAttrib(ptype, Rf_install("ptype")); + if (child_ptype == R_NilValue) { + Rf_error("Expected attribute 'ptype' for conversion to list_of"); + } + + set_converter_list_of(converter_xptr, converter, ptype, child_ptype); } else if (Rf_inherits(ptype, "vctrs_unspecified")) { converter->ptype_view.vector_type = VECTOR_TYPE_UNSPECIFIED; } else if (Rf_inherits(ptype, "Date")) { @@ -300,7 +307,8 @@ int nanoarrow_converter_set_schema(SEXP converter_xptr, SEXP schema_xptr) { ArrowArrayViewInitFromSchema(&converter->array_view, schema, &converter->error)); if (converter->ptype_view.vector_type == VECTOR_TYPE_LIST_OF || - converter->ptype_view.vector_type == VECTOR_TYPE_DATA_FRAME) { + converter->ptype_view.vector_type == VECTOR_TYPE_DATA_FRAME || + converter->ptype_view.vector_type == VECTOR_TYPE_MATRIX) { set_converter_children_schema(converter_xptr, schema_xptr); } @@ -318,7 +326,8 @@ int nanoarrow_converter_set_array(SEXP converter_xptr, SEXP array_xptr) { converter->src.length = 0; if (converter->ptype_view.vector_type == VECTOR_TYPE_LIST_OF || - converter->ptype_view.vector_type == VECTOR_TYPE_DATA_FRAME) { + converter->ptype_view.vector_type == VECTOR_TYPE_DATA_FRAME || + converter->ptype_view.vector_type == VECTOR_TYPE_MATRIX) { set_converter_children_array(converter_xptr, array_xptr); } @@ -343,17 +352,23 @@ void sync_after_converter_reallocate(SEXP converter_xptr, struct RConverter* con converter->children[i], VECTOR_ELT(result_sexp, i), capacity); } + } else if (converter->ptype_view.vector_type == VECTOR_TYPE_MATRIX) { + // Reserve for the child converter here, which ensures that a matrix column in + // a data.frame() will get allocated properly. + SEXP child_converters = VECTOR_ELT(converter_shelter, 3); + SEXP item_converter_xptr = VECTOR_ELT(child_converters, 0); + nanoarrow_converter_reserve(item_converter_xptr, + capacity * Rf_ncols(converter->ptype_view.ptype)); } } -int nanoarrow_converter_reserve(SEXP converter_xptr, R_xlen_t additional_size) { +void nanoarrow_converter_reserve(SEXP converter_xptr, R_xlen_t additional_size) { struct RConverter* converter = (struct RConverter*)R_ExternalPtrAddr(converter_xptr); SEXP converter_shelter = R_ExternalPtrProtected(converter_xptr); SEXP current_result = VECTOR_ELT(converter_shelter, 4); if (current_result != R_NilValue) { - ArrowErrorSet(&converter->error, "Reallocation in converter is not implemented"); - return ENOTSUP; + Rf_error("Reallocation in converter is not implemented"); } SEXP result_sexp; @@ -368,7 +383,6 @@ int nanoarrow_converter_reserve(SEXP converter_xptr, R_xlen_t additional_size) { sync_after_converter_reallocate(converter_xptr, converter, result_sexp, additional_size); UNPROTECT(1); - return NANOARROW_OK; } R_xlen_t nanoarrow_converter_materialize_n(SEXP converter_xptr, R_xlen_t n) { @@ -401,7 +415,7 @@ R_xlen_t nanoarrow_converter_materialize_n(SEXP converter_xptr, R_xlen_t n) { int nanoarrow_converter_materialize_all(SEXP converter_xptr) { struct RConverter* converter = (struct RConverter*)R_ExternalPtrAddr(converter_xptr); R_xlen_t remaining = converter->array_view.array->length; - NANOARROW_RETURN_NOT_OK(nanoarrow_converter_reserve(converter_xptr, remaining)); + nanoarrow_converter_reserve(converter_xptr, remaining); if (nanoarrow_converter_materialize_n(converter_xptr, remaining) != remaining) { return ERANGE; } else { @@ -415,6 +429,7 @@ int nanoarrow_converter_finalize(SEXP converter_xptr) { SEXP current_result = VECTOR_ELT(converter_shelter, 4); NANOARROW_RETURN_NOT_OK(nanoarrow_materialize_finalize_result(converter_xptr)); + current_result = VECTOR_ELT(converter_shelter, 4); // Check result size. A future implementation could also shrink the length // or reallocate a shorter vector. diff --git a/r/src/convert.h b/r/src/convert.h index 3641295d1..380579abb 100644 --- a/r/src/convert.h +++ b/r/src/convert.h @@ -41,8 +41,8 @@ int nanoarrow_converter_set_array(SEXP converter_xptr, SEXP array_xptr); // Reserve space in the R vector output for additional elements. In theory // this could be used to provide growable behaviour; however, this is not -// implemented. Returns an errno code. -int nanoarrow_converter_reserve(SEXP converter_xptr, R_xlen_t additional_size); +// implemented. +void nanoarrow_converter_reserve(SEXP converter_xptr, R_xlen_t additional_size); // Materialize the next n elements into the output. Returns the number of elements // that were actually materialized which may be less than n. diff --git a/r/src/convert_array_stream.c b/r/src/convert_array_stream.c index 4c5a16f52..29e8f1509 100644 --- a/r/src/convert_array_stream.c +++ b/r/src/convert_array_stream.c @@ -94,9 +94,7 @@ SEXP nanoarrow_c_convert_array_stream(SEXP array_stream_xptr, SEXP ptype_sexp, nanoarrow_converter_stop(converter_xptr); } - if (nanoarrow_converter_reserve(converter_xptr, size) != NANOARROW_OK) { - nanoarrow_converter_stop(converter_xptr); - } + nanoarrow_converter_reserve(converter_xptr, size); int64_t n_batches = 0; do { diff --git a/r/src/materialize.c b/r/src/materialize.c index 45d6950de..77db5cbf7 100644 --- a/r/src/materialize.c +++ b/r/src/materialize.c @@ -116,7 +116,12 @@ int nanoarrow_ptype_is_nanoarrow_vctr(SEXP ptype) { SEXP nanoarrow_materialize_realloc(SEXP ptype, R_xlen_t len) { SEXP result; - if (Rf_isObject(ptype)) { + if (Rf_isMatrix(ptype)) { + // The actual value is built in the child converter but we can't have + // a NULL here because that confuses the internals into thinking that + // the allocate was never called. + result = PROTECT(Rf_allocVector(TYPEOF(ptype), 0)); + } else if (Rf_isObject(ptype)) { // There may be a more accurate test that more precisely captures the case // where a user has specified a valid ptype that doesn't work in a preallocate // + fill conversion. @@ -301,11 +306,12 @@ static void copy_vec_into(SEXP x, SEXP dst, R_xlen_t offset, R_xlen_t len) { int nanoarrow_materialize_finalize_result(SEXP converter_xptr) { SEXP converter_shelter = R_ExternalPtrProtected(converter_xptr); + struct RConverter* converter = (struct RConverter*)R_ExternalPtrAddr(converter_xptr); SEXP result = VECTOR_ELT(converter_shelter, 4); // Materialize never called (e.g., empty stream) if (result == R_NilValue) { - NANOARROW_RETURN_NOT_OK(nanoarrow_converter_reserve(converter_xptr, 0)); + nanoarrow_converter_reserve(converter_xptr, 0); result = VECTOR_ELT(converter_shelter, 4); } @@ -357,6 +363,22 @@ int nanoarrow_materialize_finalize_result(SEXP converter_xptr) { SET_VECTOR_ELT(result, i, child_result); UNPROTECT(1); } + } else if (converter->ptype_view.vector_type == VECTOR_TYPE_MATRIX) { + SEXP child_converter_xptrs = VECTOR_ELT(converter_shelter, 3); + SEXP item_converter_xptr = VECTOR_ELT(child_converter_xptrs, 0); + NANOARROW_RETURN_NOT_OK(nanoarrow_materialize_finalize_result(item_converter_xptr)); + SEXP item_result = PROTECT(nanoarrow_converter_release_result(item_converter_xptr)); + + SEXP matrix_symbol = PROTECT(Rf_install("matrix")); + SEXP nrow_sexp = PROTECT( + Rf_ScalarInteger(Rf_xlength(item_result) / converter->schema_view.fixed_size)); + SEXP ncol_sexp = PROTECT(Rf_ScalarInteger(converter->schema_view.fixed_size)); + SEXP byrow_sexp = PROTECT(Rf_ScalarLogical(TRUE)); + SEXP matrix_call = + PROTECT(Rf_lang5(matrix_symbol, item_result, nrow_sexp, ncol_sexp, byrow_sexp)); + SEXP final_result = PROTECT(Rf_eval(matrix_call, R_BaseNamespace)); + SET_VECTOR_ELT(converter_shelter, 4, final_result); + UNPROTECT(7); } return NANOARROW_OK; @@ -496,9 +518,7 @@ static int nanoarrow_materialize_data_frame(struct RConverter* converter, static int materialize_list_element(struct RConverter* converter, SEXP converter_xptr, int64_t offset, int64_t length) { - if (nanoarrow_converter_reserve(converter_xptr, length) != NANOARROW_OK) { - nanoarrow_converter_stop(converter_xptr); - } + nanoarrow_converter_reserve(converter_xptr, length); converter->src.offset = offset; converter->src.length = length; @@ -581,6 +601,62 @@ static int nanoarrow_materialize_list_of(struct RConverter* converter, return NANOARROW_OK; } +static int nanoarrow_materialize_matrix(struct RConverter* converter, + SEXP converter_xptr) { + SEXP converter_shelter = R_ExternalPtrProtected(converter_xptr); + SEXP child_converter_xptrs = VECTOR_ELT(converter_shelter, 3); + struct RConverter* child_converter = converter->children[0]; + SEXP child_converter_xptr = VECTOR_ELT(child_converter_xptrs, 0); + + struct ArrayViewSlice* src = &converter->src; + + // Make sure we error for dictionary types + if (src->array_view->array->dictionary != NULL) { + return EINVAL; + } + + switch (src->array_view->storage_type) { + case NANOARROW_TYPE_FIXED_SIZE_LIST: + break; + default: + return EINVAL; + } + + int64_t raw_src_offset = src->array_view->offset + src->offset; + int64_t list_length = src->array_view->layout.child_size_elements; + int64_t child_length = list_length * src->length; + + if (list_length != Rf_ncols(converter->ptype_view.ptype)) { + Rf_error("Can't convert fixed_size_list(list_size=%d) to matrix with %d cols", + (int)list_length, Rf_ncols(converter->ptype_view.ptype)); + } + + // First, we update the child array offset to account for the parent offset and + // materialize the child array. + child_converter->src.offset += raw_src_offset * list_length; + child_converter->src.length = child_length; + if (nanoarrow_converter_materialize_n(child_converter_xptr, child_length) != + child_length) { + return EINVAL; + } + + // If we have parent nulls, we have to project them into the destination + if (src->array_view->null_count != 0 && + src->array_view->buffer_views[0].data.data != NULL) { + // Here, dst.offset has already been incremented such that it's at the end + // of the chunk, but we need the original one for fill_vec_with_nulls(). + int64_t original_dst_offset = child_converter->dst.offset - child_length; + for (int64_t i = 0; i < src->length; i++) { + if (ArrowArrayViewIsNull(src->array_view, src->offset + i)) { + fill_vec_with_nulls(child_converter->dst.vec_sexp, + original_dst_offset + (i * list_length), list_length); + } + } + } + + return NANOARROW_OK; +} + static int nanoarrow_materialize_base(struct RConverter* converter, SEXP converter_xptr) { struct ArrayViewSlice* src = &converter->src; struct VectorSlice* dst = &converter->dst; @@ -614,6 +690,8 @@ static int nanoarrow_materialize_base(struct RConverter* converter, SEXP convert return nanoarrow_materialize_blob(src, dst, options); case VECTOR_TYPE_LIST_OF: return nanoarrow_materialize_list_of(converter, converter_xptr); + case VECTOR_TYPE_MATRIX: + return nanoarrow_materialize_matrix(converter, converter_xptr); case VECTOR_TYPE_DATA_FRAME: return nanoarrow_materialize_data_frame(converter, converter_xptr); default: diff --git a/r/src/materialize_common.h b/r/src/materialize_common.h index 6c811b615..edd76556d 100644 --- a/r/src/materialize_common.h +++ b/r/src/materialize_common.h @@ -44,6 +44,7 @@ enum VectorType { VECTOR_TYPE_BLOB, VECTOR_TYPE_LIST_OF, VECTOR_TYPE_DATA_FRAME, + VECTOR_TYPE_MATRIX, VECTOR_TYPE_OTHER }; diff --git a/r/tests/testthat/test-as-array.R b/r/tests/testthat/test-as-array.R index c9f6d0eb1..28950ba0f 100644 --- a/r/tests/testthat/test-as-array.R +++ b/r/tests/testthat/test-as-array.R @@ -584,6 +584,27 @@ test_that("as_nanoarrow_array() works for blob::blob() -> na_binary()", { ) }) +test_that("as_nanoarrow_array() works for matrix -> na_fixed_size_list()", { + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + + # Check without explicit schema + array <- as_nanoarrow_array(mat) + expect_identical(infer_nanoarrow_schema(array)$format, "+w:2") + expect_identical(infer_nanoarrow_schema(array)$children[[1]]$format, "i") + expect_identical(array$buffers[[1]]$size_bytes, 0) + expect_identical(convert_buffer(array$children[[1]]$buffers[[2]]), 1:6) + + # Check with explicit schema + array <- as_nanoarrow_array( + mat, + schema = na_fixed_size_list(na_double(), list_size = 2) + ) + expect_identical(infer_nanoarrow_schema(array)$format, "+w:2") + expect_identical(infer_nanoarrow_schema(array)$children[[1]]$format, "g") + expect_identical(array$buffers[[1]]$size_bytes, 0) + expect_identical(convert_buffer(array$children[[1]]$buffers[[2]]), as.double(1:6)) +}) + test_that("as_nanoarrow_array() works for blob::blob() -> na_fixed_size_binary()", { # Without nulls array <- as_nanoarrow_array(blob::as_blob(letters), schema = na_fixed_size_binary(1)) diff --git a/r/tests/testthat/test-convert-array-stream.R b/r/tests/testthat/test-convert-array-stream.R index 534b91d27..0f510210c 100644 --- a/r/tests/testthat/test-convert-array-stream.R +++ b/r/tests/testthat/test-convert-array-stream.R @@ -137,6 +137,117 @@ test_that("convert array stream works for struct-style vectors", { ) }) +test_that("convert array stream works for fixed_size_list_of() -> matrix()", { + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + array <- as_nanoarrow_array(mat) + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + rbind(mat, mat) + ) + + # Check with non-default ptype + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(double(), ncol = 2)), + rbind( + matrix(as.double(1:6), ncol = 2, byrow = TRUE), + matrix(as.double(1:6), ncol = 2, byrow = TRUE) + ) + ) +}) + +test_that("convert array stream works for empty fixed_size_list_of() -> matrix()", { + stream <- basic_array_stream(list(), schema = na_fixed_size_list(na_int32(), 2)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + matrix(integer(), ncol = 2) + ) +}) + +test_that("convert array stream works for nested fixed_size_list_of() -> matrix()", { + df <- data.frame(x = 1:3) + df$mat <- matrix(1:6, ncol = 2, byrow = TRUE) + + expected <- df[c(1:3, 1:3),] + row.names(expected) <- 1:6 + + array <- as_nanoarrow_array(df) + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, df[integer(0),]), + expected + ) +}) + +test_that("convert array stream works for fixed_size_list_of() with non-zero offsets -> matrix() ", { + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + + # Non-zero parent offset + array <- as_nanoarrow_array(mat) + array <- nanoarrow_array_modify(array, list(offset = 1, length = 2)) + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + rbind(mat[2:3, ], mat[2:3, ]) + ) + + + # Non-zero child offset + array <- as_nanoarrow_array(mat) + array <- nanoarrow_array_modify( + array, + list( + length = 2, + children = list( + nanoarrow_array_modify(array$children[[1]], list(offset = 2, length = 4)) + ) + ) + ) + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + rbind(mat[2:3, ], mat[2:3, ]) + ) + + # Non-zero child offset and non-zero parent offset + array <- nanoarrow_array_modify( + array, + list( + offset = 1, + length = 1, + children = list( + nanoarrow_array_modify(array$children[[1]], list(offset = 2, length = 4)) + ) + ) + ) + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + rbind(mat[3, ], mat[3, ]) + ) +}) + +test_that("convert array stream works for fixed_size_list_of() with parent nulls -> matrix()", { + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + array <- as_nanoarrow_array(mat) + array <- nanoarrow_array_modify( + array, + list( + null_count = 1, + buffers = list( + as_nanoarrow_array(c(TRUE, TRUE, FALSE))$buffers[[2]] + ) + ) + ) + + stream <- basic_array_stream(list(array, array)) + expect_identical( + convert_array_stream(stream, matrix(integer(), ncol = 2)), + rbind(mat[c(1, 2, NA), ], mat[c(1, 2, NA), ]) + ) +}) + test_that("convert array stream respects the value of n", { batches <- list( data.frame(x = 1:5), diff --git a/r/tests/testthat/test-convert-array.R b/r/tests/testthat/test-convert-array.R index 42cea423b..e6a18f850 100644 --- a/r/tests/testthat/test-convert-array.R +++ b/r/tests/testthat/test-convert-array.R @@ -1041,6 +1041,31 @@ test_that("convert to vector works for null -> vctrs::list_of()", { ) }) +test_that("convert to vector works for fixed_size_list_of() -> matrix()", { + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + array <- as_nanoarrow_array(mat) + + expect_identical( + convert_array(array, matrix(double(), ncol = 2)), + matrix(as.double(1:6), ncol = 2, byrow = TRUE) + ) +}) + +test_that("convert to vector errors for invalid matrix()", { + expect_error( + convert_array(as_nanoarrow_array(1:6), matrix()), + "Can't convert array to R vector of type matrix" + ) + + mat <- matrix(1:6, ncol = 2, byrow = TRUE) + array <- as_nanoarrow_array(mat) + expect_error( + convert_array(array, matrix(integer(), ncol = 3)), + "Can't convert fixed_size_list(list_size=2) to matrix with 3 cols", + fixed = TRUE + ) +}) + test_that("convert to vector works for Date", { array_date <- as_nanoarrow_array(as.Date(c(NA, "2000-01-01"))) expect_identical( diff --git a/r/tests/testthat/test-schema.R b/r/tests/testthat/test-schema.R index 364a36eb7..d7a1d7f3f 100644 --- a/r/tests/testthat/test-schema.R +++ b/r/tests/testthat/test-schema.R @@ -85,6 +85,10 @@ test_that("infer_nanoarrow_schema() methods work for hms type", { expect_identical(infer_nanoarrow_schema(hms::hms())$format, "ttm") }) +test_that("infer_nanoarrow_schema() method works for matrix type", { + expect_identical(infer_nanoarrow_schema(matrix(ncol = 3))$format, "+w:3") +}) + test_that("infer_nanoarrow_schema() methods work for vctrs types", { skip_if_not_installed("vctrs") diff --git a/src/nanoarrow/device/device.c b/src/nanoarrow/device/device.c index 8178b9526..a7aea2a27 100644 --- a/src/nanoarrow/device/device.c +++ b/src/nanoarrow/device/device.c @@ -626,9 +626,7 @@ struct ArrowDevice* ArrowDeviceCuda(ArrowDeviceType device_type, int64_t device_ #endif #if !defined(NANOARROW_DEVICE_WITH_METAL) -struct ArrowDevice* ArrowDeviceMetalDefaultDevice(void) { - return NULL; -} +struct ArrowDevice* ArrowDeviceMetalDefaultDevice(void) { return NULL; } ArrowErrorCode ArrowDeviceMetalInitDefaultDevice(struct ArrowDevice* device, struct ArrowError* error) {