Compute expressions over the given groups
Description
This expression is similar to performing a group by aggregation and joining the result back into the original DataFrame. The outcome is similar to how window functions work in PostgreSQL.
Usage
<Expr>$over(..., order_by = NULL, mapping_strategy = "group_to_rows")
Arguments
…
|
Column(s) to group by. Accepts expression input. Characters are parsed as column names. |
order_by
|
Order the window functions/aggregations with the partitioned groups by
the result of the expression passed to order_by . Can be an
Expr. Strings are parsed as column names.
|
mapping_strategy
|
One of the following:
|
Value
Expr
Examples
library("polars")
# Pass the name of a column to compute the expression over that column.
df = pl$DataFrame(
a = c("a", "a", "b", "b", "b"),
b = c(1, 2, 3, 5, 3),
c = c(5, 4, 2, 1, 3)
)
df$with_columns(
pl$col("c")$max()$over("a")$name$suffix("_max")
)
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬───────┐
#> │ a ┆ b ┆ c ┆ c_max │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪═══════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ 5.0 │
#> │ a ┆ 2.0 ┆ 4.0 ┆ 5.0 │
#> │ b ┆ 3.0 ┆ 2.0 ┆ 3.0 │
#> │ b ┆ 5.0 ┆ 1.0 ┆ 3.0 │
#> │ b ┆ 3.0 ┆ 3.0 ┆ 3.0 │
#> └─────┴─────┴─────┴───────┘
# Expression input is supported.
df$with_columns(
pl$col("c")$max()$over(pl$col("b") %/% 2)$name$suffix("_max")
)
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬───────┐
#> │ a ┆ b ┆ c ┆ c_max │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪═══════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ 5.0 │
#> │ a ┆ 2.0 ┆ 4.0 ┆ 4.0 │
#> │ b ┆ 3.0 ┆ 2.0 ┆ 4.0 │
#> │ b ┆ 5.0 ┆ 1.0 ┆ 1.0 │
#> │ b ┆ 3.0 ┆ 3.0 ┆ 4.0 │
#> └─────┴─────┴─────┴───────┘
# Group by multiple columns by passing a character vector of column names
# or list of expressions.
df$with_columns(
pl$col("c")$min()$over(c("a", "b"))$name$suffix("_min")
)
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬───────┐
#> │ a ┆ b ┆ c ┆ c_min │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪═══════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ 5.0 │
#> │ a ┆ 2.0 ┆ 4.0 ┆ 4.0 │
#> │ b ┆ 3.0 ┆ 2.0 ┆ 2.0 │
#> │ b ┆ 5.0 ┆ 1.0 ┆ 1.0 │
#> │ b ┆ 3.0 ┆ 3.0 ┆ 2.0 │
#> └─────┴─────┴─────┴───────┘
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬───────┐
#> │ a ┆ b ┆ c ┆ c_min │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪═══════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ 5.0 │
#> │ a ┆ 2.0 ┆ 4.0 ┆ 4.0 │
#> │ b ┆ 3.0 ┆ 2.0 ┆ 2.0 │
#> │ b ┆ 5.0 ┆ 1.0 ┆ 1.0 │
#> │ b ┆ 3.0 ┆ 3.0 ┆ 2.0 │
#> └─────┴─────┴─────┴───────┘
# Or use positional arguments to group by multiple columns in the same way.
df$with_columns(
pl$col("c")$min()$over("a", pl$col("b") %% 2)$name$suffix("_min")
)
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬───────┐
#> │ a ┆ b ┆ c ┆ c_min │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╪═══════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ 5.0 │
#> │ a ┆ 2.0 ┆ 4.0 ┆ 4.0 │
#> │ b ┆ 3.0 ┆ 2.0 ┆ 1.0 │
#> │ b ┆ 5.0 ┆ 1.0 ┆ 1.0 │
#> │ b ┆ 3.0 ┆ 3.0 ┆ 1.0 │
#> └─────┴─────┴─────┴───────┘
# Alternative mapping strategy: join values in a list output
df$with_columns(
top_2 = pl$col("c")$top_k(2)$over("a", mapping_strategy = "join")
)
#> shape: (5, 4)
#> ┌─────┬─────┬─────┬────────────┐
#> │ a ┆ b ┆ c ┆ top_2 │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ str ┆ f64 ┆ f64 ┆ list[f64] │
#> ╞═════╪═════╪═════╪════════════╡
#> │ a ┆ 1.0 ┆ 5.0 ┆ [5.0, 4.0] │
#> │ a ┆ 2.0 ┆ 4.0 ┆ [5.0, 4.0] │
#> │ b ┆ 3.0 ┆ 2.0 ┆ [2.0, 3.0] │
#> │ b ┆ 5.0 ┆ 1.0 ┆ [2.0, 3.0] │
#> │ b ┆ 3.0 ┆ 3.0 ┆ [2.0, 3.0] │
#> └─────┴─────┴─────┴────────────┘
# order_by specifies how values are sorted within a group, which is
# essential when the operation depends on the order of values
df = pl$DataFrame(
g = c(1, 1, 1, 1, 2, 2, 2, 2),
t = c(1, 2, 3, 4, 4, 1, 2, 3),
x = c(10, 20, 30, 40, 10, 20, 30, 40)
)
# without order_by, the first and second values in the second group would
# be inverted, which would be wrong
df$with_columns(
x_lag = pl$col("x")$shift(1)$over("g", order_by = "t")
)
#> shape: (8, 4)
#> ┌─────┬─────┬──────┬───────┐
#> │ g ┆ t ┆ x ┆ x_lag │
#> │ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪══════╪═══════╡
#> │ 1.0 ┆ 1.0 ┆ 10.0 ┆ null │
#> │ 1.0 ┆ 2.0 ┆ 20.0 ┆ 10.0 │
#> │ 1.0 ┆ 3.0 ┆ 30.0 ┆ 20.0 │
#> │ 1.0 ┆ 4.0 ┆ 40.0 ┆ 30.0 │
#> │ 2.0 ┆ 4.0 ┆ 10.0 ┆ 40.0 │
#> │ 2.0 ┆ 1.0 ┆ 20.0 ┆ null │
#> │ 2.0 ┆ 2.0 ┆ 30.0 ┆ 20.0 │
#> │ 2.0 ┆ 3.0 ┆ 40.0 ┆ 30.0 │
#> └─────┴─────┴──────┴───────┘