Group a LazyFrame
Description
This doesn’t modify the data but only stores information about the group
structure. This structure can then be used by several functions
($agg()
,
$filter()
, etc.).
Usage
<LazyFrame>$group_by(..., maintain_order = polars_options()\$maintain_order)
Arguments
…
|
Column(s) to group by. Accepts expression input. Characters are parsed as column names. |
maintain_order
|
Ensure that the order of the groups is consistent with the input data.
This is slower than a default group by. Setting this to
TRUE blocks the possibility to run on the streaming engine.
The default value can be changed with
options(polars.maintain_order = TRUE) .
|
Value
LazyGroupBy (a LazyFrame with special groupby methods like
$agg()
)
Examples
library("polars")
lf = pl$LazyFrame(
a = c("a", "b", "a", "b", "c"),
b = c(1, 2, 1, 3, 3),
c = c(5, 4, 3, 2, 1)
)
lf$group_by("a")$agg(pl$col("b")$sum())$collect()
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ a ┆ b │
#> │ --- ┆ --- │
#> │ str ┆ f64 │
#> ╞═════╪═════╡
#> │ a ┆ 2.0 │
#> │ b ┆ 5.0 │
#> │ c ┆ 3.0 │
#> └─────┴─────┘
# Set `maintain_order = TRUE` to ensure the order of the groups is consistent with the input.
lf$group_by("a", maintain_order = TRUE)$agg(pl$col("c"))$collect()
#> shape: (3, 2)
#> ┌─────┬────────────┐
#> │ a ┆ c │
#> │ --- ┆ --- │
#> │ str ┆ list[f64] │
#> ╞═════╪════════════╡
#> │ a ┆ [5.0, 3.0] │
#> │ b ┆ [4.0, 2.0] │
#> │ c ┆ [1.0] │
#> └─────┴────────────┘
# Group by multiple columns by passing a list of column names.
lf$group_by(c("a", "b"))$agg(pl$max("c"))$collect()
#> shape: (4, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ c ┆ 3.0 ┆ 1.0 │
#> │ b ┆ 2.0 ┆ 4.0 │
#> │ b ┆ 3.0 ┆ 2.0 │
#> │ a ┆ 1.0 ┆ 5.0 │
#> └─────┴─────┴─────┘
# Or pass some arguments to group by multiple columns in the same way.
# Expressions are also accepted.
lf$group_by("a", pl$col("b") %/% 2)$agg(
pl$col("c")$mean()
)$collect()
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ a ┆ 0.0 ┆ 4.0 │
#> │ b ┆ 1.0 ┆ 3.0 │
#> │ c ┆ 1.0 ┆ 1.0 │
#> └─────┴─────┴─────┘
# The columns will be renamed to the argument names.
lf$group_by(d = "a", e = pl$col("b") %/% 2)$agg(
pl$col("c")$mean()
)$collect()
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ d ┆ e ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ b ┆ 1.0 ┆ 3.0 │
#> │ a ┆ 0.0 ┆ 4.0 │
#> │ c ┆ 1.0 ┆ 1.0 │
#> └─────┴─────┴─────┘