Collect columns into a struct column
Description
Collect columns into a struct column
Usage
pl$struct(exprs, schema = NULL)
Arguments
exprs
|
Columns/Expressions to collect into a Struct. |
schema
|
Optional schema named list that explicitly defines the struct field
dtypes. Each name must match a column name wrapped in the struct. Can
only be used to cast some or all dtypes, not to change the names. If
NULL (default), columns datatype are not modified. Columns
that do not exist are silently ignored and not included in the final
struct.
|
Details
pl$struct()
creates an Expr of DataType
Struct()
.
Compared to the Python implementation, pl$struct()
doesn’t
have the argument eager
and always returns an Expr. Use
$to_series()
to return a Series.
Value
Expr with dtype Struct
Examples
library("polars")
# isolated expression to wrap all columns in a struct aliased 'my_struct'
pl$struct(pl$all())$alias("my_struct")
#> polars Expr: *.as_struct().alias("my_struct")
# wrap all column into on column/Series
df = pl$DataFrame(
int = 1:2,
str = c("a", "b"),
bool = c(TRUE, NA),
list = list(1:2, 3L)
)$select(
pl$struct(pl$all())$alias("my_struct")
)
print(df)
#> shape: (2, 1)
#> ┌─────────────────────┐
#> │ my_struct │
#> │ --- │
#> │ struct[4] │
#> ╞═════════════════════╡
#> │ {1,"a",true,[1, 2]} │
#> │ {2,"b",null,[3]} │
#> └─────────────────────┘
#> $my_struct
#> DataType: Struct(
#> [
#> Field {
#> name: "int",
#> dtype: Int32,
#> },
#> Field {
#> name: "str",
#> dtype: String,
#> },
#> Field {
#> name: "bool",
#> dtype: Boolean,
#> },
#> Field {
#> name: "list",
#> dtype: List(
#> Int32,
#> ),
#> },
#> ],
#> )
# wrap two columns in a struct and provide a schema to set all or some DataTypes by name
e1 = pl$struct(
pl$col(c("int", "str")),
schema = list(int = pl$Int64, str = pl$String)
)$alias("my_struct")
# same result as e.g. wrapping the columns in a struct and casting afterwards
e2 = pl$struct(
list(pl$col("int"), pl$col("str"))
)$cast(
pl$Struct(int = pl$Int64, str = pl$String)
)$alias("my_struct")
df = pl$DataFrame(
int = 1:2,
str = c("a", "b"),
bool = c(TRUE, NA),
list = list(1:2, 3L)
)
# verify equality in R
identical(df$select(e1)$to_list(), df$select(e2)$to_list())
#> [1] TRUE
#> shape: (2, 1)
#> ┌───────────┐
#> │ my_struct │
#> │ --- │
#> │ struct[2] │
#> ╞═══════════╡
#> │ {1,"a"} │
#> │ {2,"b"} │
#> └───────────┘
#> my_struct
#> 1 1, a
#> 2 2, b