Group a DataFrame
Description
This doesn’t modify the data but only stores information about the group
structure. This structure can then be used by several functions
($agg()
,
$filter()
, etc.).
Usage
<DataFrame>$group_by(..., maintain_order = polars_options()\$maintain_order)
Arguments
…
|
Column(s) to group by. Accepts expression input. Characters are parsed as column names. |
maintain_order
|
Ensure that the order of the groups is consistent with the input data.
This is slower than a default group by. Setting this to
TRUE blocks the possibility to run on the streaming engine.
The default value can be changed with
options(polars.maintain_order = TRUE) .
|
Details
Within each group, the order of the rows is always preserved, regardless
of the maintain_order
argument.
Value
GroupBy (a DataFrame with special groupby methods like
$agg()
)
See Also
-
\
$partition_by()
Examples
library("polars")
df = pl$DataFrame(
a = c("a", "b", "a", "b", "c"),
b = c(1, 2, 1, 3, 3),
c = c(5, 4, 3, 2, 1)
)
df$group_by("a")$agg(pl$col("b")$sum())
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ a ┆ b │
#> │ --- ┆ --- │
#> │ str ┆ f64 │
#> ╞═════╪═════╡
#> │ b ┆ 5.0 │
#> │ c ┆ 3.0 │
#> │ a ┆ 2.0 │
#> └─────┴─────┘
# Set `maintain_order = TRUE` to ensure the order of the groups is consistent with the input.
df$group_by("a", maintain_order = TRUE)$agg(pl$col("c"))
#> shape: (3, 2)
#> ┌─────┬────────────┐
#> │ a ┆ c │
#> │ --- ┆ --- │
#> │ str ┆ list[f64] │
#> ╞═════╪════════════╡
#> │ a ┆ [5.0, 3.0] │
#> │ b ┆ [4.0, 2.0] │
#> │ c ┆ [1.0] │
#> └─────┴────────────┘
# Group by multiple columns by passing a list of column names.
df$group_by(c("a", "b"))$agg(pl$max("c"))
#> shape: (4, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ b ┆ 3.0 ┆ 2.0 │
#> │ a ┆ 1.0 ┆ 5.0 │
#> │ c ┆ 3.0 ┆ 1.0 │
#> │ b ┆ 2.0 ┆ 4.0 │
#> └─────┴─────┴─────┘
# Or pass some arguments to group by multiple columns in the same way.
# Expressions are also accepted.
df$group_by("a", pl$col("b") %/% 2)$agg(
pl$col("c")$mean()
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ a ┆ b ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ c ┆ 1.0 ┆ 1.0 │
#> │ a ┆ 0.0 ┆ 4.0 │
#> │ b ┆ 1.0 ┆ 3.0 │
#> └─────┴─────┴─────┘
# The columns will be renamed to the argument names.
df$group_by(d = "a", e = pl$col("b") %/% 2)$agg(
pl$col("c")$mean()
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ d ┆ e ┆ c │
#> │ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ c ┆ 1.0 ┆ 1.0 │
#> │ a ┆ 0.0 ┆ 4.0 │
#> │ b ┆ 1.0 ┆ 3.0 │
#> └─────┴─────┴─────┘