Skip to content

Start a when-then-otherwise expression

Source code

Description

Always initiated with a pl$when()$then() and optionally followed by chaining one or more $when()$then() statements.

An optional $otherwise() can be appended at the end. If not declared, a default of $otherwise(NA) is used.

Similar to pl$coalesce, the value from the first condition that evaluates to TRUE will be picked. If all conditions are FALSE, the otherwise value is picked.

Usage

pl$when(...)

Arguments

\<dynamic-dots\> Condition(s) that must be met in order to apply the subsequent statement. Accepts one or more boolean expressions, which are implicitly combined with &.

Details

Polars computes all expressions passed to when-then-otherwise in parallel and filters afterwards. This means each expression must be valid on its own, regardless of the conditions in the when-then-otherwise chain.

String inputs e.g. when(“string”), then(“string”) or otherwise(“string”) are parsed as column names. pl$lit() can be used to create string values.

Value

A polars expression

Examples

library("polars")

# Below we add a column with the value 1, where column "foo" > 2 and the
# value 1 + column "bar" where it isn’t.
df <- pl$DataFrame(foo = c(1, 3, 4), bar = c(3, 4, 0))
df$with_columns(
  val = pl$when(pl$col("foo") > 2)$then(1)$otherwise(1 + pl$col("bar"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ 1.0 │
#> └─────┴─────┴─────┘
# Note that when-then always executes all expressions.
# The results are folded left to right, picking the then value from the first
# when condition that is true.
# If no when condition is true the otherwise value is picked.
df$with_columns(
  when = pl$col("foo") > 2,
  then = 1,
  otherwise = 1 + pl$col("bar")
)$with_columns(
  val = pl$when("when")$then("then")$otherwise("otherwise")
)
#> shape: (3, 6)
#> ┌─────┬─────┬───────┬──────┬───────────┬─────┐
#> │ foo ┆ bar ┆ when  ┆ then ┆ otherwise ┆ val │
#> │ --- ┆ --- ┆ ---   ┆ ---  ┆ ---       ┆ --- │
#> │ f64 ┆ f64 ┆ bool  ┆ f64  ┆ f64       ┆ f64 │
#> ╞═════╪═════╪═══════╪══════╪═══════════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ false ┆ 1.0  ┆ 4.0       ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ true  ┆ 1.0  ┆ 5.0       ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ true  ┆ 1.0  ┆ 1.0       ┆ 1.0 │
#> └─────┴─────┴───────┴──────┴───────────┴─────┘
# Strings are parsed as column names
df$with_columns(
  val = pl$when(pl$col("foo") > 2)$then("foo")$otherwise("bar")
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 3.0 │
#> │ 3.0 ┆ 4.0 ┆ 3.0 │
#> │ 4.0 ┆ 0.0 ┆ 4.0 │
#> └─────┴─────┴─────┘
# Use pl$lit() to create literal values
df$with_columns(
  val = pl$when(pl$col("foo") > 2)$then(pl$lit("foo"))$otherwise(pl$lit("bar"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ bar │
#> │ 3.0 ┆ 4.0 ┆ foo │
#> │ 4.0 ┆ 0.0 ┆ foo │
#> └─────┴─────┴─────┘
# Multiple when-then statements can be chained.
df$with_columns(
  val = pl$when(pl$col("foo") > 2)$
    then(1)$
    when(pl$col("bar") > 2)$
    then(4)$
    otherwise(-1)
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ f64 │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ 4.0 │
#> │ 3.0 ┆ 4.0 ┆ 1.0 │
#> │ 4.0 ┆ 0.0 ┆ 1.0 │
#> └─────┴─────┴─────┘
# The otherwise statement is optional and defaults to $otherwise(NA) if not given.
# This idiom is commonly used to null out values.
df$with_columns(pl$when(pl$col("foo") == 3)$then("bar"))
#> shape: (3, 2)
#> ┌─────┬──────┐
#> │ foo ┆ bar  │
#> │ --- ┆ ---  │
#> │ f64 ┆ f64  │
#> ╞═════╪══════╡
#> │ 1.0 ┆ null │
#> │ 3.0 ┆ 4.0  │
#> │ 4.0 ┆ null │
#> └─────┴──────┘
# Multiple predicates passed to when are combined with &
df$with_columns(
  val = pl$when(pl$col("foo") > 2, pl$col("bar") < 3)$
    then(pl$lit("Yes"))$
    otherwise(pl$lit("No"))
)
#> shape: (3, 3)
#> ┌─────┬─────┬─────┐
#> │ foo ┆ bar ┆ val │
#> │ --- ┆ --- ┆ --- │
#> │ f64 ┆ f64 ┆ str │
#> ╞═════╪═════╪═════╡
#> │ 1.0 ┆ 3.0 ┆ No  │
#> │ 3.0 ┆ 4.0 ┆ No  │
#> │ 4.0 ┆ 0.0 ┆ Yes │
#> └─────┴─────┴─────┘
# Structs can be used as a way to return multiple values.
# Here we swap the "foo" and "bar" values when "foo" is greater than 2.
df$with_columns(
  pl$when(pl$col("foo") > 2)$
    then(pl$struct(foo = "bar", bar = "foo"))$
    otherwise(pl$struct("foo", "bar"))$
    struct$
    unnest()
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 1.0 ┆ 3.0 │
#> │ 4.0 ┆ 3.0 │
#> │ 0.0 ┆ 4.0 │
#> └─────┴─────┘
# The output name of a when-then expression comes from the first then branch.
# Here we try to set all columns to 0 if any column contains a value less than 2.
tryCatch(
  df$with_columns(
    pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all())
  ),
  error = function(e) e
)
#> <error/rlang_error>
#> Error in `df$with_columns()`:
#> ! Evaluation failed in `$with_columns()`.
#> Caused by error:
#> ! Evaluation failed in `$collect()`.
#> Caused by error:
#> ! the name 'literal' passed to `LazyFrame.with_columns` is duplicate
#> 
#> It's possible that multiple expressions are returning the same default column name. If this is the case, try renaming the columns with `.alias("new_name")` to avoid duplicate column names.
#> ---
#> Backtrace:
#>      ▆
#>   1. ├─base::tryCatch(...)
#>   2. │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>   3. │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>   4. │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>   5. └─df$with_columns(pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all()))
#>   6.   ├─polars:::wrap(self$lazy()$with_columns(...)$collect(`_eager` = TRUE)) at r-polars/R/dataframe-frame.R:418:3
#>   7.   │ └─rlang::try_fetch(...) at r-polars/R/utils-wrap.R:3:3
#>   8.   │   ├─base::tryCatch(...)
#>   9.   │   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  10.   │   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  11.   │   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  12.   │   └─base::withCallingHandlers(...)
#>  13.   └─self$lazy()$with_columns(...)$collect(`_eager` = TRUE) at r-polars/R/utils-wrap.R:3:3
#>  14.     ├─polars:::wrap(...) at r-polars/R/lazyframe-frame.R:295:3
#>  15.     │ └─rlang::try_fetch(...) at r-polars/R/utils-wrap.R:3:3
#>  16.     │   ├─base::tryCatch(...)
#>  17.     │   │ └─base (local) tryCatchList(expr, classes, parentenv, handlers)
#>  18.     │   │   └─base (local) tryCatchOne(expr, names, parentenv, handlers[[1L]])
#>  19.     │   │     └─base (local) doTryCatch(return(expr), name, parentenv, handler)
#>  20.     │   └─base::withCallingHandlers(...)
#>  21.     └─ldf$collect(engine) at r-polars/R/lazyframe-frame.R:332:5
#>  22.       └─polars:::.savvy_wrap_PlRDataFrame(...) at r-polars/R/000-wrappers.R:3670:5
# name$keep() could be used to give preference to the column expression.
df$with_columns(
  pl$when(pl$any_horizontal(pl$all() < 2))$then(0)$otherwise(pl$all())$name$keep()
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 0.0 ┆ 0.0 │
#> │ 3.0 ┆ 4.0 │
#> │ 0.0 ┆ 0.0 │
#> └─────┴─────┘
# The logic could also be changed to move the column expression inside then.
df$with_columns(
  pl$when(pl$any_horizontal(pl$all() < 2)$not())$then(pl$all())$otherwise(0)
)
#> shape: (3, 2)
#> ┌─────┬─────┐
#> │ foo ┆ bar │
#> │ --- ┆ --- │
#> │ f64 ┆ f64 │
#> ╞═════╪═════╡
#> │ 0.0 ┆ 0.0 │
#> │ 3.0 ┆ 4.0 │
#> │ 0.0 ┆ 0.0 │
#> └─────┴─────┘