Start a when-then-otherwise expression
Description
Always initiated with a pl$when()$then()
and optionally
followed by chaining one or more
$when()$then()
statements.
An optional $otherwise()
can be
appended at the end. If not declared, a default of
$otherwise(NA)
is used.
Similar to pl$coalesce, the value from the first condition that
evaluates to TRUE
will be picked. If all conditions are
FALSE
, the otherwise
value is picked.
Usage
pl$when(...)
Arguments
…
|
\<dynamic-dots \> Condition(s) that must be met in order to
apply the subsequent statement. Accepts one or more boolean expressions,
which are implicitly combined with & .
|
Details
Polars computes all expressions passed to when-then-otherwise in parallel and filters afterwards. This means each expression must be valid on its own, regardless of the conditions in the when-then-otherwise chain.
String inputs e.g. when(“string”)
,
then(“string”)
or otherwise(“string”)
are
parsed as column names. pl$lit() can be used to create string values.
Value
A polars expression
Examples
library("polars")
# Below we add a column with the value 1, where column "foo" > 2 and the
# value 1 + column "bar" where it isn’t.
df <- pl$DataFrame(foo = c(1, 3, 4), bar = c(3, 4, 0))
df$with_columns(
val = pl$when(pl$col("foo") > 2)$then(1)$otherwise(1 + pl$col("bar"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ 1.0 │
#> └─────┴─────┴─────┘
# Note that when-then always executes all expressions.
# The results are folded left to right, picking the then value from the first
# when condition that is true.
# If no when condition is true the otherwise value is picked.
df$with_columns(
when = pl$col("foo") > 2,
then = 1,
otherwise = 1 + pl$col("bar")
)$with_columns(
val = pl$when("when")$then("then")$otherwise("otherwise")
)
#> shape: (3, 6)
#> ┌─────┬─────┬───────┬──────┬───────────┬─────┐
#> │ foo ┆ bar ┆ when ┆ then ┆ otherwise ┆ val │
#> │ --- ┆ --- ┆ --- ┆ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ bool ┆ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═══════╪══════╪═══════════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ false ┆ 1.0 ┆ 4.0 ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ true ┆ 1.0 ┆ 5.0 ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ true ┆ 1.0 ┆ 1.0 ┆ 1.0 │
#> └─────┴─────┴───────┴──────┴───────────┴─────┘
# Strings are parsed as column names
df$with_columns(
val = pl$when(pl$col("foo") > 2)$then("foo")$otherwise("bar")
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 3.0 │
#> │ 3.0 ┆ 4.0 ┆ 3.0 │
#> │ 4.0 ┆ 0.0 ┆ 4.0 │
#> └─────┴─────┴─────┘
# Use pl$lit() to create literal values
df$with_columns(
val = pl$when(pl$col("foo") > 2)$then(pl$lit("foo"))$otherwise(pl$lit("bar"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ bar │
#> │ 3.0 ┆ 4.0 ┆ foo │
#> │ 4.0 ┆ 0.0 ┆ foo │
#> └─────┴─────┴─────┘
# Multiple when-then statements can be chained.
df$with_columns(
val = pl$when(pl$col("foo") > 2)$
then(1)$
when(pl$col("bar") > 2)$
then(4)$
otherwise(-1)
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ 1.0 │
#> └─────┴─────┴─────┘
# The otherwise statement is optional and defaults to $otherwise(NA) if not given.
# This idiom is commonly used to null out values.
df$with_columns(pl$when(pl$col("foo") == 3)$then("bar"))
#> shape: (3, 2)
#> ┌─────┬──────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪══════╡
#> │ 1.0 ┆ null │
#> │ 3.0 ┆ 4.0 │
#> │ 4.0 ┆ null │
#> └─────┴──────┘
# Multiple predicates passed to when are combined with &
df$with_columns(
val = pl$when(pl$col("foo") > 2, pl$col("bar") < 3)$
then(pl$lit("Yes"))$
otherwise(pl$lit("No"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ No │
#> │ 3.0 ┆ 4.0 ┆ No │
#> │ 4.0 ┆ 0.0 ┆ Yes │
#> └─────┴─────┴─────┘
# Structs can be used as a way to return multiple values.
# Here we swap the "foo" and "bar" values when "foo" is greater than 2.
df$with_columns(
pl$when(pl$col("foo") > 2)$
then(pl$struct(foo = "bar", bar = "foo"))$
otherwise(pl$struct("foo", "bar"))$
struct$
unnest()
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 1.0 ┆ 3.0 │
#> │ 4.0 ┆ 3.0 │
#> │ 0.0 ┆ 4.0 │
#> └─────┴─────┘
# The output name of a when-then expression comes from the first then branch.
# Here we try to set all columns to 0 if any column contains a value less than 2.
tryCatch(
df$with_columns(
pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all())
),
error = function(e) e
)
#> <error/rlang_error>
#> Error in `df$with_columns()`:
#> ! Evaluation failed in `$with_columns()`.
#> Caused by error:
#> ! Evaluation failed in `$collect()`.
#> Caused by error:
#> ! the name 'literal' passed to `LazyFrame.with_columns` is duplicate
#>
#> It's possible that multiple expressions are returning the same default column name. If this is the case, try renaming the columns with `.alias("new_name")` to avoid duplicate column names.
#> ---
#> Backtrace:
#> ▆
#> 1. ├─base::tryCatch(...)
#> 2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 3. │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 4. │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 5. └─df$with_columns(pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all()))
#> 6. ├─polars:::wrap(self$lazy()$with_columns(...)$collect(`_eager` = TRUE)) at r-polars/R/dataframe-frame.R:418:3
#> 7. │ └─rlang::try_fetch(...) at r-polars/R/utils-wrap.R:3:3
#> 8. │ ├─base::tryCatch(...)
#> 9. │ │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 10. │ │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 11. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 12. │ └─base::withCallingHandlers(...)
#> 13. └─self$lazy()$with_columns(...)$collect(`_eager` = TRUE) at r-polars/R/utils-wrap.R:3:3
#> 14. ├─polars:::wrap(...) at r-polars/R/lazyframe-frame.R:295:3
#> 15. │ └─rlang::try_fetch(...) at r-polars/R/utils-wrap.R:3:3
#> 16. │ ├─base::tryCatch(...)
#> 17. │ │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#> 18. │ │ └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#> 19. │ │ └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#> 20. │ └─base::withCallingHandlers(...)
#> 21. └─ldf$collect(engine) at r-polars/R/lazyframe-frame.R:332:5
#> 22. └─polars:::.savvy_wrap_PlRDataFrame(...) at r-polars/R/000-wrappers.R:3670:5
# name$keep() could be used to give preference to the column expression.
df$with_columns(
pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all())$name$keep()
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 0.0 ┆ 0.0 │
#> │ 3.0 ┆ 4.0 │
#> │ 0.0 ┆ 0.0 │
#> └─────┴─────┘
# The logic could also be changed to move the column expression inside then.
df$with_columns(
pl$when(pl$any_horizontal(pl$all() < 2)$not())$then(pl$all())$otherwise(0)
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 0.0 ┆ 0.0 │
#> │ 3.0 ┆ 4.0 │
#> │ 0.0 ┆ 0.0 │
#> └─────┴─────┘