Skip to content

Create Struct DataType

Source code

Description

One can create a Struct data type with pl$Struct(). There are also multiple ways to create columns of data type Struct in a DataFrame or a Series, see the examples.

Usage

DataType_Struct(...)

Arguments

Either named inputs of the form field_name = datatype or objects of class RPolarsField created by pl$Field().

Value

A Struct DataType containing a list of Fields

Examples

library("polars")

# create a Struct-DataType
pl$Struct(foo = pl$Int32, pl$Field("bar", pl$Boolean))
#> DataType: Struct(
#>     [
#>         Field {
#>             name: "foo",
#>             dtype: Int32,
#>         },
#>         Field {
#>             name: "bar",
#>             dtype: Boolean,
#>         },
#>     ],
#> )
# check if an element is any kind of Struct()
test = pl$Struct(a = pl$UInt64)
pl$same_outer_dt(test, pl$Struct())
#> [1] TRUE
# `test` is a type of Struct, but it doesn't mean it is equal to an empty Struct
test == pl$Struct()
#> [1] FALSE
# The way to create a `Series` of type `Struct` is a bit convoluted as it involves
# `data.frame()`, `list()`, and `I()`:
as_polars_series(
  data.frame(a = 1:2, b = I(list(c("x", "y"), "z")))
)
#> polars Series: shape: (2,)
#> Series: '' [struct[2]]
#> [
#>  {1,["x", "y"]}
#>  {2,["z"]}
#> ]
# A slightly simpler way would be via `tibble::tibble()` or
# `data.table::data.table()`:
if (requireNamespace("tibble", quietly = TRUE)) {
  as_polars_series(
    tibble::tibble(a = 1:2, b = list(c("x", "y"), "z"))
  )
}
#> polars Series: shape: (2,)
#> Series: '' [struct[2]]
#> [
#>  {1,["x", "y"]}
#>  {2,["z"]}
#> ]
# Finally, one can use `pl$struct()` to convert existing columns or `Series`
# to a `Struct`:
x = pl$DataFrame(
  a = 1:2,
  b = list(c("x", "y"), "z")
)

out = x$select(pl$struct(c("a", "b")))
out
#> shape: (2, 1)
#> ┌────────────────┐
#> │ a              │
#> │ ---            │
#> │ struct[2]      │
#> ╞════════════════╡
#> │ {1,["x", "y"]} │
#> │ {2,["z"]}      │
#> └────────────────┘
out$schema
#> $a
#> DataType: Struct(
#>     [
#>         Field {
#>             name: "a",
#>             dtype: Int32,
#>         },
#>         Field {
#>             name: "b",
#>             dtype: List(
#>                 String,
#>             ),
#>         },
#>     ],
#> )