Skip to content

Collect columns into a struct column

Source code

Description

Collect columns into a struct column

Usage

pl$struct(exprs, schema = NULL)

Arguments

exprs Columns/Expressions to collect into a Struct.
schema Optional schema named list that explicitly defines the struct field dtypes. Each name must match a column name wrapped in the struct. Can only be used to cast some or all dtypes, not to change the names. If NULL (default), columns datatype are not modified. Columns that do not exist are silently ignored and not included in the final struct.

Details

pl$struct() creates an Expr of DataType Struct().

Compared to the Python implementation, pl$struct() doesn’t have the argument eager and always returns an Expr. Use $to_series() to return a Series.

Value

Expr with dtype Struct

Examples

library("polars")

# isolated expression to wrap all columns in a struct aliased 'my_struct'
pl$struct(pl$all())$alias("my_struct")
#> polars Expr: *.as_struct().alias("my_struct")
# wrap all column into on column/Series
df = pl$DataFrame(
  int = 1:2,
  str = c("a", "b"),
  bool = c(TRUE, NA),
  list = list(1:2, 3L)
)$select(
  pl$struct(pl$all())$alias("my_struct")
)

print(df)
#> shape: (2, 1)
#> ┌─────────────────────┐
#> │ my_struct           │
#> │ ---                 │
#> │ struct[4]           │
#> ╞═════════════════════╡
#> │ {1,"a",true,[1, 2]} │
#> │ {2,"b",null,[3]}    │
#> └─────────────────────┘
print(df$schema) # returns a schema, a named list containing one element a Struct named my_struct
#> $my_struct
#> DataType: Struct(
#>     [
#>         Field {
#>             name: "int",
#>             dtype: Int32,
#>         },
#>         Field {
#>             name: "str",
#>             dtype: String,
#>         },
#>         Field {
#>             name: "bool",
#>             dtype: Boolean,
#>         },
#>         Field {
#>             name: "list",
#>             dtype: List(
#>                 Int32,
#>             ),
#>         },
#>     ],
#> )
# wrap two columns in a struct and provide a schema to set all or some DataTypes by name
e1 = pl$struct(
  pl$col(c("int", "str")),
  schema = list(int = pl$Int64, str = pl$String)
)$alias("my_struct")
# same result as e.g. wrapping the columns in a struct and casting afterwards
e2 = pl$struct(
  list(pl$col("int"), pl$col("str"))
)$cast(
  pl$Struct(int = pl$Int64, str = pl$String)
)$alias("my_struct")

df = pl$DataFrame(
  int = 1:2,
  str = c("a", "b"),
  bool = c(TRUE, NA),
  list = list(1:2, 3L)
)

# verify equality in R
identical(df$select(e1)$to_list(), df$select(e2)$to_list())
#> [1] TRUE
df$select(e2)
#> shape: (2, 1)
#> ┌───────────┐
#> │ my_struct │
#> │ ---       │
#> │ struct[2] │
#> ╞═══════════╡
#> │ {1,"a"}   │
#> │ {2,"b"}   │
#> └───────────┘
df$select(e2)$to_data_frame()
#>   my_struct
#> 1      1, a
#> 2      2, b